jasper-recall 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -1,13 +1,15 @@
1
1
  ---
2
2
  name: jasper-recall
3
- version: 0.2.1
4
- description: Local RAG system for agent memory using ChromaDB and sentence-transformers. Provides semantic search over session logs, daily notes, and memory files. v0.2.1 adds HTTP server for Docker-isolated agents. Commands: recall, index-digests, digest-sessions, privacy-check, sync-shared, serve.
3
+ version: 0.2.3
4
+ description: Local RAG system for agent memory using ChromaDB and sentence-transformers. Provides semantic search over session logs, daily notes, and memory files. v0.2.3 adds shared ChromaDB collections for multi-agent memory isolation. Commands: recall, index-digests, digest-sessions, privacy-check, sync-shared, serve.
5
5
  ---
6
6
 
7
- # Jasper Recall v0.2.1
7
+ # Jasper Recall v0.2.3
8
8
 
9
9
  Local RAG (Retrieval-Augmented Generation) system for AI agent memory. Gives your agent the ability to remember and search past conversations.
10
10
 
11
+ **New in v0.2.2:** Shared ChromaDB Collections — separate collections for private, shared, and learnings content. Better isolation for multi-agent setups.
12
+
11
13
  **New in v0.2.1:** Recall Server — HTTP API for Docker-isolated agents that can't run CLI directly.
12
14
 
13
15
  **New in v0.2.0:** Shared Agent Memory — bidirectional learning between main and sandboxed agents with privacy controls.
@@ -111,7 +113,7 @@ Schedule regular indexing:
111
113
  }
112
114
  ```
113
115
 
114
- ## Shared Agent Memory (v0.2.0)
116
+ ## Shared Agent Memory (v0.2.0+)
115
117
 
116
118
  For multi-agent setups where sandboxed agents need access to some memories:
117
119
 
@@ -125,6 +127,41 @@ This is visible to all agents.
125
127
 
126
128
  ## 2026-02-05 [private] - Personal note
127
129
  This is main agent only (default if untagged).
130
+
131
+ ## 2026-02-05 [learning] - Pattern discovered
132
+ Learnings shared bidirectionally between agents.
133
+ ```
134
+
135
+ ### ChromaDB Collections (v0.2.2+)
136
+
137
+ Memory is stored in separate collections for isolation:
138
+
139
+ | Collection | Purpose | Who accesses |
140
+ |------------|---------|--------------|
141
+ | `private_memories` | Main agent's private content | Main agent only |
142
+ | `shared_memories` | [public] tagged content | Sandboxed agents |
143
+ | `agent_learnings` | Learnings from any agent | All agents |
144
+ | `jasper_memory` | Legacy unified (backward compat) | Fallback |
145
+
146
+ **Collection selection:**
147
+ ```bash
148
+ # Main agent (default) - searches private_memories
149
+ recall "api design"
150
+
151
+ # Sandboxed agents - searches shared_memories only
152
+ recall "product info" --public-only
153
+
154
+ # Search learnings only
155
+ recall "patterns" --learnings
156
+
157
+ # Search all collections (merged results)
158
+ recall "everything" --all
159
+
160
+ # Specific collection
161
+ recall "something" --collection private_memories
162
+
163
+ # Legacy mode (single collection)
164
+ recall "old way" --legacy
128
165
  ```
129
166
 
130
167
  ### Sandboxed Agent Access
@@ -159,8 +196,12 @@ recall "query" [OPTIONS]
159
196
  Options:
160
197
  -n, --limit N Number of results (default: 5)
161
198
  --json Output as JSON
162
- -v, --verbose Show similarity scores
163
- --public-only Only search shared/public content (v0.2.0+)
199
+ -v, --verbose Show similarity scores and collection source
200
+ --public-only Search shared_memories only (sandboxed agents)
201
+ --learnings Search agent_learnings only
202
+ --all Search all collections (merged results)
203
+ --collection X Search specific collection by name
204
+ --legacy Use legacy jasper_memory collection
164
205
  ```
165
206
 
166
207
  ### serve (v0.2.1+)
package/cli/config.js ADDED
@@ -0,0 +1,145 @@
1
+ /**
2
+ * Configuration management for jasper-recall
3
+ *
4
+ * Priority: ENV vars > config file > defaults
5
+ * Config file: ~/.jasper-recall/config.json
6
+ */
7
+
8
+ const fs = require('fs');
9
+ const path = require('path');
10
+ const os = require('os');
11
+
12
+ const CONFIG_DIR = path.join(os.homedir(), '.jasper-recall');
13
+ const CONFIG_FILE = path.join(CONFIG_DIR, 'config.json');
14
+
15
+ const DEFAULTS = {
16
+ workspace: path.join(os.homedir(), '.openclaw', 'workspace'),
17
+ chromaDb: path.join(os.homedir(), '.openclaw', 'chroma-db'),
18
+ venv: path.join(os.homedir(), '.openclaw', 'rag-env'),
19
+ serverPort: 3458,
20
+ serverHost: '127.0.0.1',
21
+ publicOnly: true, // Default for API access
22
+ memoryPaths: ['memory/'],
23
+ sharedMemoryPath: 'memory/shared/'
24
+ };
25
+
26
+ /**
27
+ * Load config from file
28
+ */
29
+ function loadConfigFile() {
30
+ try {
31
+ if (fs.existsSync(CONFIG_FILE)) {
32
+ const raw = fs.readFileSync(CONFIG_FILE, 'utf8');
33
+ return JSON.parse(raw);
34
+ }
35
+ } catch (err) {
36
+ console.error(`Warning: Could not load config from ${CONFIG_FILE}:`, err.message);
37
+ }
38
+ return {};
39
+ }
40
+
41
+ /**
42
+ * Get config value with priority: ENV > file > default
43
+ */
44
+ function get(key) {
45
+ const envMap = {
46
+ workspace: 'RECALL_WORKSPACE',
47
+ chromaDb: 'RECALL_CHROMA_DB',
48
+ venv: 'RECALL_VENV',
49
+ serverPort: 'RECALL_PORT',
50
+ serverHost: 'RECALL_HOST',
51
+ publicOnly: 'RECALL_PUBLIC_ONLY'
52
+ };
53
+
54
+ // Check env var first
55
+ const envKey = envMap[key];
56
+ if (envKey && process.env[envKey]) {
57
+ const val = process.env[envKey];
58
+ // Handle booleans
59
+ if (val === 'true') return true;
60
+ if (val === 'false') return false;
61
+ // Handle numbers
62
+ if (!isNaN(val)) return parseInt(val, 10);
63
+ return val;
64
+ }
65
+
66
+ // Check config file
67
+ const fileConfig = loadConfigFile();
68
+ if (key in fileConfig) {
69
+ return fileConfig[key];
70
+ }
71
+
72
+ // Return default
73
+ return DEFAULTS[key];
74
+ }
75
+
76
+ /**
77
+ * Get all config
78
+ */
79
+ function getAll() {
80
+ const fileConfig = loadConfigFile();
81
+ const config = { ...DEFAULTS, ...fileConfig };
82
+
83
+ // Override with env vars
84
+ for (const key of Object.keys(DEFAULTS)) {
85
+ config[key] = get(key);
86
+ }
87
+
88
+ return config;
89
+ }
90
+
91
+ /**
92
+ * Save config to file
93
+ */
94
+ function save(config) {
95
+ if (!fs.existsSync(CONFIG_DIR)) {
96
+ fs.mkdirSync(CONFIG_DIR, { recursive: true });
97
+ }
98
+ fs.writeFileSync(CONFIG_FILE, JSON.stringify(config, null, 2));
99
+ console.log(`Config saved to ${CONFIG_FILE}`);
100
+ }
101
+
102
+ /**
103
+ * Initialize config interactively
104
+ */
105
+ function init(options = {}) {
106
+ const config = {
107
+ workspace: options.workspace || DEFAULTS.workspace,
108
+ chromaDb: options.chromaDb || DEFAULTS.chromaDb,
109
+ venv: options.venv || DEFAULTS.venv,
110
+ serverPort: options.serverPort || DEFAULTS.serverPort
111
+ };
112
+
113
+ save(config);
114
+ return config;
115
+ }
116
+
117
+ /**
118
+ * Show current config
119
+ */
120
+ function show() {
121
+ console.log('\nJasper Recall Configuration');
122
+ console.log('===========================\n');
123
+ console.log(`Config file: ${CONFIG_FILE}`);
124
+ console.log(`Exists: ${fs.existsSync(CONFIG_FILE) ? 'yes' : 'no'}\n`);
125
+
126
+ const config = getAll();
127
+ for (const [key, value] of Object.entries(config)) {
128
+ const source = process.env[`RECALL_${key.toUpperCase()}`] ? '(env)' :
129
+ loadConfigFile()[key] !== undefined ? '(file)' : '(default)';
130
+ console.log(` ${key}: ${value} ${source}`);
131
+ }
132
+ console.log('');
133
+ }
134
+
135
+ module.exports = {
136
+ CONFIG_DIR,
137
+ CONFIG_FILE,
138
+ DEFAULTS,
139
+ get,
140
+ getAll,
141
+ save,
142
+ init,
143
+ show,
144
+ loadConfigFile
145
+ };
@@ -15,7 +15,7 @@ const fs = require('fs');
15
15
  const path = require('path');
16
16
  const os = require('os');
17
17
 
18
- const VERSION = '0.2.3';
18
+ const VERSION = '0.2.4';
19
19
 
20
20
  // Check for updates in background (non-blocking)
21
21
  const { checkInBackground } = require('./update-check');
@@ -139,9 +139,20 @@ COMMANDS:
139
139
  index Index memory files (alias for index-digests)
140
140
  digest Process session logs (alias for digest-sessions)
141
141
  serve Start HTTP API server (for sandboxed agents)
142
+ config Show or set configuration
142
143
  update Check for updates
143
144
  help Show this help message
144
145
 
146
+ CONFIGURATION:
147
+ Config file: ~/.jasper-recall/config.json
148
+
149
+ Environment variables (override config file):
150
+ RECALL_WORKSPACE Memory workspace path
151
+ RECALL_CHROMA_DB ChromaDB storage path
152
+ RECALL_VENV Python venv path
153
+ RECALL_PORT Server port (default: 3458)
154
+ RECALL_HOST Server host (default: 127.0.0.1)
155
+
145
156
  EXAMPLES:
146
157
  npx jasper-recall setup
147
158
  recall "what did we discuss yesterday"
@@ -203,6 +214,18 @@ switch (command) {
203
214
  }
204
215
  });
205
216
  break;
217
+ case 'config':
218
+ // Configuration management
219
+ const config = require('./config');
220
+ const configArg = process.argv[3];
221
+ if (configArg === 'init') {
222
+ config.init();
223
+ } else if (configArg === 'path') {
224
+ console.log(config.CONFIG_FILE);
225
+ } else {
226
+ config.show();
227
+ }
228
+ break;
206
229
  case '--version':
207
230
  case '-v':
208
231
  console.log(VERSION);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "jasper-recall",
3
- "version": "0.2.3",
3
+ "version": "0.2.4",
4
4
  "description": "Local RAG system for AI agent memory using ChromaDB and sentence-transformers",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -2,6 +2,11 @@
2
2
  """
3
3
  Index markdown files into ChromaDB for RAG retrieval.
4
4
  Reads from memory/, session-digests/, repos/, and founder-logs/.
5
+
6
+ v0.3.0: Multi-collection architecture
7
+ - private_memories: main agent only (default)
8
+ - shared_memories: accessible to sandboxed agents
9
+ - agent_learnings: insights from agent interactions (moltbook, etc.)
5
10
  """
6
11
 
7
12
  import os
@@ -56,8 +61,96 @@ def get_file_hash(content: str) -> str:
56
61
  return hashlib.md5(content.encode()).hexdigest()
57
62
 
58
63
 
64
+ def determine_collection(rel_path: str, content: str) -> str:
65
+ """
66
+ Determine which collection a file belongs to based on path and content.
67
+
68
+ Returns: 'private', 'shared', or 'learnings'
69
+ """
70
+ rel_lower = rel_path.lower()
71
+ content_lower = content.lower()
72
+
73
+ # Agent learnings: moltbook insights, agent collaboration notes
74
+ if any(x in rel_lower for x in ['moltbook/', 'learnings/', 'agent-insights/']):
75
+ return 'learnings'
76
+ if '[learning]' in content_lower or '[insight]' in content_lower:
77
+ return 'learnings'
78
+
79
+ # Shared: explicit shared folder or [public] tag
80
+ if 'shared/' in rel_lower:
81
+ return 'shared'
82
+ if '[public]' in content_lower:
83
+ return 'shared'
84
+
85
+ # Default: private
86
+ return 'private'
87
+
88
+
89
+ def index_to_collection(collection, model, filepath, rel_path, content, file_hash, stats):
90
+ """Index a file's chunks into a specific collection."""
91
+ filename = os.path.basename(filepath)
92
+
93
+ # Check for existing chunks from this file
94
+ try:
95
+ existing = collection.get(
96
+ where={"source": rel_path},
97
+ include=[]
98
+ )
99
+ except Exception:
100
+ existing = {'ids': []}
101
+
102
+ if existing['ids']:
103
+ # Check if hash matches (stored in first chunk's metadata)
104
+ try:
105
+ existing_meta = collection.get(
106
+ ids=[existing['ids'][0]],
107
+ include=["metadatas"]
108
+ )
109
+ if existing_meta['metadatas'] and existing_meta['metadatas'][0].get('file_hash') == file_hash:
110
+ stats['skipped'] += 1
111
+ return False
112
+ except Exception:
113
+ pass
114
+
115
+ # File changed, delete old chunks
116
+ collection.delete(ids=existing['ids'])
117
+
118
+ # Chunk the content
119
+ chunks = chunk_text(content)
120
+
121
+ if not chunks:
122
+ return False
123
+
124
+ # Generate embeddings
125
+ embeddings = model.encode(chunks).tolist()
126
+
127
+ # Create IDs and metadata
128
+ ids = [f"{rel_path}::{i}" for i in range(len(chunks))]
129
+ metadatas = [
130
+ {
131
+ "source": rel_path,
132
+ "chunk_index": i,
133
+ "file_hash": file_hash,
134
+ "filename": filename,
135
+ }
136
+ for i in range(len(chunks))
137
+ ]
138
+
139
+ # Add to collection
140
+ collection.add(
141
+ ids=ids,
142
+ embeddings=embeddings,
143
+ documents=chunks,
144
+ metadatas=metadatas
145
+ )
146
+
147
+ stats['chunks'] += len(chunks)
148
+ stats['files'] += 1
149
+ return True
150
+
151
+
59
152
  def main():
60
- print("🦊 Jasper Recall — RAG Indexer")
153
+ print("🦊 Jasper Recall — RAG Indexer v0.3.0")
61
154
  print("=" * 40)
62
155
 
63
156
  # Check if memory dir exists
@@ -75,12 +168,30 @@ def main():
75
168
  os.makedirs(CHROMA_DIR, exist_ok=True)
76
169
  client = chromadb.PersistentClient(path=CHROMA_DIR)
77
170
 
78
- # Get or create collection
79
- collection = client.get_or_create_collection(
171
+ # Create collections with descriptions
172
+ collections = {
173
+ "private": client.get_or_create_collection(
174
+ name="private_memories",
175
+ metadata={"description": "Private agent memories - main agent only"}
176
+ ),
177
+ "shared": client.get_or_create_collection(
178
+ name="shared_memories",
179
+ metadata={"description": "Shared memories - accessible to sandboxed agents"}
180
+ ),
181
+ "learnings": client.get_or_create_collection(
182
+ name="agent_learnings",
183
+ metadata={"description": "Agent learnings and insights from interactions"}
184
+ ),
185
+ }
186
+
187
+ # Also maintain legacy collection for backwards compatibility
188
+ legacy_collection = client.get_or_create_collection(
80
189
  name="jasper_memory",
81
- metadata={"description": "Agent session digests and memory files"}
190
+ metadata={"description": "Legacy collection - use specific collections instead"}
82
191
  )
83
192
 
193
+ print(f"✓ Collections: private_memories, shared_memories, agent_learnings")
194
+
84
195
  # Gather files to index
85
196
  files_to_index = []
86
197
 
@@ -113,12 +224,23 @@ def main():
113
224
  files_to_index.extend(glob.glob(os.path.join(shared_dir, "*.md")))
114
225
  files_to_index.extend(glob.glob(os.path.join(shared_dir, "**/*.md"), recursive=True))
115
226
 
227
+ # Moltbook learnings
228
+ moltbook_dir = os.path.join(MEMORY_DIR, "shared", "moltbook")
229
+ if os.path.exists(moltbook_dir):
230
+ files_to_index.extend(glob.glob(os.path.join(moltbook_dir, "*.md")))
231
+
232
+ # Remove duplicates while preserving order
233
+ files_to_index = list(dict.fromkeys(files_to_index))
234
+
116
235
  print(f"Found {len(files_to_index)} files to index")
117
236
 
118
- # Track stats
119
- total_chunks = 0
120
- indexed_files = 0
121
- skipped_files = 0
237
+ # Track stats per collection
238
+ stats = {
239
+ "private": {"files": 0, "chunks": 0, "skipped": 0},
240
+ "shared": {"files": 0, "chunks": 0, "skipped": 0},
241
+ "learnings": {"files": 0, "chunks": 0, "skipped": 0},
242
+ "legacy": {"files": 0, "chunks": 0, "skipped": 0},
243
+ }
122
244
 
123
245
  for filepath in files_to_index:
124
246
  filename = os.path.basename(filepath)
@@ -134,69 +256,32 @@ def main():
134
256
  if not content.strip():
135
257
  continue
136
258
 
137
- # Check if already indexed with same hash
138
259
  file_hash = get_file_hash(content)
139
260
 
140
- # Check for existing chunks from this file
141
- existing = collection.get(
142
- where={"source": rel_path},
143
- include=[]
144
- )
261
+ # Determine target collection
262
+ coll_key = determine_collection(rel_path, content)
263
+ collection = collections[coll_key]
145
264
 
146
- if existing['ids']:
147
- # Check if hash matches (stored in first chunk's metadata)
148
- existing_meta = collection.get(
149
- ids=[existing['ids'][0]],
150
- include=["metadatas"]
151
- )
152
- if existing_meta['metadatas'] and existing_meta['metadatas'][0].get('file_hash') == file_hash:
153
- skipped_files += 1
154
- continue
155
-
156
- # File changed, delete old chunks
157
- collection.delete(ids=existing['ids'])
158
-
159
- # Chunk the content
160
- chunks = chunk_text(content)
161
-
162
- if not chunks:
163
- continue
164
-
165
- # Generate embeddings
166
- embeddings = model.encode(chunks).tolist()
167
-
168
- # Determine visibility (public if in shared/ or contains [public] tag)
169
- is_public = "shared/" in rel_path or "[public]" in content.lower()
170
- visibility = "public" if is_public else "private"
171
-
172
- # Create IDs and metadata
173
- ids = [f"{rel_path}::{i}" for i in range(len(chunks))]
174
- metadatas = [
175
- {
176
- "source": rel_path,
177
- "chunk_index": i,
178
- "file_hash": file_hash,
179
- "filename": filename,
180
- "visibility": visibility
181
- }
182
- for i in range(len(chunks))
183
- ]
265
+ # Index to the appropriate collection
266
+ indexed = index_to_collection(
267
+ collection, model, filepath, rel_path, content, file_hash, stats[coll_key]
268
+ )
184
269
 
185
- # Add to collection
186
- collection.add(
187
- ids=ids,
188
- embeddings=embeddings,
189
- documents=chunks,
190
- metadatas=metadatas
270
+ # Also index to legacy collection for backwards compatibility
271
+ index_to_collection(
272
+ legacy_collection, model, filepath, rel_path, content, file_hash, stats["legacy"]
191
273
  )
192
274
 
193
- total_chunks += len(chunks)
194
- indexed_files += 1
195
- print(f" ✓ {filename}: {len(chunks)} chunks")
275
+ if indexed:
276
+ print(f" ✓ {filename} → {coll_key} ({stats[coll_key]['chunks']} chunks)")
196
277
 
197
278
  print("=" * 40)
198
- print(f"✓ Indexed {indexed_files} files ({total_chunks} chunks)")
199
- print(f" Skipped {skipped_files} unchanged files")
279
+ print("✓ Indexing complete")
280
+ for key, s in stats.items():
281
+ if key == "legacy":
282
+ continue
283
+ if s['files'] > 0 or s['skipped'] > 0:
284
+ print(f" {key}: {s['files']} files ({s['chunks']} chunks), {s['skipped']} skipped")
200
285
  print(f" Database: {CHROMA_DIR}")
201
286
 
202
287
 
package/scripts/recall.py CHANGED
@@ -1,7 +1,13 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
3
  RAG recall: Search agent memory for relevant context.
4
- Usage: recall "query" [--limit N] [--json] [--verbose]
4
+ Usage: recall "query" [--limit N] [--json] [--verbose] [--collection NAME]
5
+
6
+ v0.3.0: Multi-collection support
7
+ - private_memories: main agent only (default for main agent)
8
+ - shared_memories: accessible to sandboxed agents
9
+ - agent_learnings: insights from agent interactions
10
+ - all: search all collections (main agent only)
5
11
  """
6
12
 
7
13
  import os
@@ -13,9 +19,16 @@ import json
13
19
  CHROMA_DIR = os.environ.get("RECALL_CHROMA_DB", os.path.expanduser("~/.openclaw/chroma-db"))
14
20
  VENV_PATH = os.environ.get("RECALL_VENV", os.path.expanduser("~/.openclaw/rag-env"))
15
21
 
22
+ # Collection names
23
+ COLLECTIONS = {
24
+ "private": "private_memories",
25
+ "shared": "shared_memories",
26
+ "learnings": "agent_learnings",
27
+ "legacy": "jasper_memory",
28
+ }
29
+
16
30
  # Activate the venv
17
31
  sys.path.insert(0, os.path.join(VENV_PATH, "lib/python3.12/site-packages"))
18
- # Also try python3.11, 3.10 for compatibility
19
32
  for pyver in ["python3.11", "python3.10"]:
20
33
  alt_path = os.path.join(VENV_PATH, f"lib/{pyver}/site-packages")
21
34
  if os.path.exists(alt_path):
@@ -30,13 +43,56 @@ except ImportError as e:
30
43
  sys.exit(1)
31
44
 
32
45
 
46
+ def search_collection(collection, query_embedding, limit):
47
+ """Search a single collection and return results."""
48
+ try:
49
+ results = collection.query(
50
+ query_embeddings=[query_embedding],
51
+ n_results=limit,
52
+ include=["documents", "metadatas", "distances"]
53
+ )
54
+ return results
55
+ except Exception as e:
56
+ return None
57
+
58
+
59
+ def merge_results(all_results, limit):
60
+ """Merge and sort results from multiple collections by similarity."""
61
+ merged = []
62
+
63
+ for coll_name, results in all_results.items():
64
+ if not results or not results['documents'][0]:
65
+ continue
66
+
67
+ for doc, meta, dist in zip(
68
+ results['documents'][0],
69
+ results['metadatas'][0],
70
+ results['distances'][0]
71
+ ):
72
+ merged.append({
73
+ "collection": coll_name,
74
+ "document": doc,
75
+ "metadata": meta,
76
+ "distance": dist,
77
+ "similarity": 1 - dist
78
+ })
79
+
80
+ # Sort by similarity (descending)
81
+ merged.sort(key=lambda x: x['similarity'], reverse=True)
82
+
83
+ return merged[:limit]
84
+
85
+
33
86
  def main():
34
87
  parser = argparse.ArgumentParser(description="Search agent memory")
35
88
  parser.add_argument("query", help="Search query")
36
89
  parser.add_argument("-n", "--limit", type=int, default=5, help="Number of results (default: 5)")
37
90
  parser.add_argument("--json", action="store_true", help="Output as JSON")
38
91
  parser.add_argument("-v", "--verbose", action="store_true", help="Show similarity scores")
39
- parser.add_argument("--public-only", action="store_true", help="Only search public/shared content (for sandboxed agents)")
92
+ parser.add_argument("--public-only", action="store_true",
93
+ help="Only search shared content (for sandboxed agents)")
94
+ parser.add_argument("-c", "--collection", choices=["private", "shared", "learnings", "all", "legacy"],
95
+ default=None, help="Specific collection to search (default: all for main, shared for --public-only)")
40
96
  args = parser.parse_args()
41
97
 
42
98
  if not os.path.exists(CHROMA_DIR):
@@ -47,61 +103,57 @@ def main():
47
103
  model = SentenceTransformer('all-MiniLM-L6-v2')
48
104
  client = chromadb.PersistentClient(path=CHROMA_DIR)
49
105
 
50
- try:
51
- collection = client.get_collection("jasper_memory")
52
- except Exception:
53
- print("❌ Collection not found. Run 'index-digests' first.", file=sys.stderr)
54
- sys.exit(1)
106
+ # Determine which collections to search
107
+ if args.public_only:
108
+ # Sandboxed agents: only shared + learnings (public content)
109
+ if args.collection:
110
+ if args.collection not in ["shared", "learnings"]:
111
+ print(f"❌ --public-only restricts to 'shared' or 'learnings' collections", file=sys.stderr)
112
+ sys.exit(1)
113
+ search_collections = [args.collection]
114
+ else:
115
+ search_collections = ["shared", "learnings"]
116
+ elif args.collection:
117
+ if args.collection == "all":
118
+ search_collections = ["private", "shared", "learnings"]
119
+ else:
120
+ search_collections = [args.collection]
121
+ else:
122
+ # Default for main agent: search all collections
123
+ search_collections = ["private", "shared", "learnings"]
124
+
125
+ # Get collections
126
+ collections_to_query = {}
127
+ for coll_key in search_collections:
128
+ coll_name = COLLECTIONS.get(coll_key, coll_key)
129
+ try:
130
+ collections_to_query[coll_key] = client.get_collection(coll_name)
131
+ except Exception:
132
+ # Collection doesn't exist yet, skip
133
+ pass
134
+
135
+ if not collections_to_query:
136
+ # Fallback to legacy collection
137
+ try:
138
+ collections_to_query["legacy"] = client.get_collection("jasper_memory")
139
+ except Exception:
140
+ print("❌ No collections found. Run 'index-digests' first.", file=sys.stderr)
141
+ sys.exit(1)
55
142
 
56
143
  # Embed query
57
144
  query_embedding = model.encode([args.query])[0].tolist()
58
145
 
59
- # Search with optional public-only filter
60
- # Fetch extra results if filtering, since we'll post-filter
61
- fetch_limit = args.limit * 3 if args.public_only else args.limit
146
+ # Search each collection
147
+ all_results = {}
148
+ for coll_key, collection in collections_to_query.items():
149
+ results = search_collection(collection, query_embedding, args.limit * 2)
150
+ if results:
151
+ all_results[coll_key] = results
62
152
 
63
- query_params = {
64
- "query_embeddings": [query_embedding],
65
- "n_results": fetch_limit,
66
- "include": ["documents", "metadatas", "distances"]
67
- }
153
+ # Merge and limit results
154
+ merged = merge_results(all_results, args.limit)
68
155
 
69
- results = collection.query(**query_params)
70
-
71
- # Post-filter for public-only mode
72
- if args.public_only and results['documents'][0]:
73
- filtered_docs = []
74
- filtered_metas = []
75
- filtered_dists = []
76
-
77
- for doc, meta, dist in zip(
78
- results['documents'][0],
79
- results['metadatas'][0],
80
- results['distances'][0]
81
- ):
82
- source = meta.get('source', '')
83
- visibility = meta.get('visibility', '')
84
-
85
- # Check if source is in shared/ folder OR has public visibility
86
- is_shared = 'shared/' in source
87
- is_public = visibility == 'public'
88
-
89
- # Reject if content contains [private] tag
90
- has_private_tag = '[private]' in doc.lower()
91
-
92
- if (is_shared or is_public) and not has_private_tag:
93
- filtered_docs.append(doc)
94
- filtered_metas.append(meta)
95
- filtered_dists.append(dist)
96
-
97
- if len(filtered_docs) >= args.limit:
98
- break
99
-
100
- results['documents'][0] = filtered_docs
101
- results['metadatas'][0] = filtered_metas
102
- results['distances'][0] = filtered_dists
103
-
104
- if not results['documents'][0]:
156
+ if not merged:
105
157
  if args.json:
106
158
  print("[]")
107
159
  else:
@@ -110,33 +162,29 @@ def main():
110
162
 
111
163
  if args.json:
112
164
  output = []
113
- for i, (doc, meta, dist) in enumerate(zip(
114
- results['documents'][0],
115
- results['metadatas'][0],
116
- results['distances'][0]
117
- )):
165
+ for i, item in enumerate(merged):
118
166
  output.append({
119
167
  "rank": i + 1,
120
- "source": meta.get('source', 'unknown'),
121
- "similarity": round(1 - dist, 3), # Convert distance to similarity
122
- "content": doc
168
+ "collection": item["collection"],
169
+ "source": item["metadata"].get("source", "unknown"),
170
+ "similarity": round(item["similarity"], 3),
171
+ "content": item["document"]
123
172
  })
124
173
  print(json.dumps(output, indent=2))
125
174
  else:
126
- print(f"🔍 Results for: \"{args.query}\"\n")
175
+ searched = ", ".join(search_collections)
176
+ print(f"🔍 Results for: \"{args.query}\" (searched: {searched})\n")
127
177
 
128
- for i, (doc, meta, dist) in enumerate(zip(
129
- results['documents'][0],
130
- results['metadatas'][0],
131
- results['distances'][0]
132
- )):
133
- similarity = 1 - dist
178
+ for i, item in enumerate(merged):
179
+ similarity = item["similarity"]
134
180
  score_str = f" ({similarity:.1%})" if args.verbose else ""
135
- source = meta.get('source', 'unknown')
181
+ source = item["metadata"].get("source", "unknown")
182
+ coll_tag = f"[{item['collection']}] " if len(search_collections) > 1 else ""
136
183
 
137
- print(f"━━━ [{i+1}] {source}{score_str} ━━━")
184
+ print(f"━━━ [{i+1}] {coll_tag}{source}{score_str} ━━━")
138
185
  # Truncate long content
139
- content = doc[:500] + "..." if len(doc) > 500 else doc
186
+ content = item["document"]
187
+ content = content[:500] + "..." if len(content) > 500 else content
140
188
  print(content)
141
189
  print()
142
190
 
@@ -0,0 +1,176 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Write a learning to the agent_learnings collection.
4
+ Designed for sandboxed agents to contribute back to shared memory.
5
+
6
+ Usage:
7
+ write-learning "Brief title" "Learning content..."
8
+ write-learning --agent moltbook "Title" "Content"
9
+ write-learning --category engagement "Title" "Content"
10
+ write-learning --dry-run "Title" "Content"
11
+ """
12
+
13
+ import os
14
+ import sys
15
+ import argparse
16
+ import json
17
+ import hashlib
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+
21
+ # Support custom paths via environment
22
+ WORKSPACE = os.environ.get("RECALL_WORKSPACE", os.path.expanduser("~/.openclaw/workspace"))
23
+ CHROMA_DIR = os.environ.get("RECALL_CHROMA_DB", os.path.expanduser("~/.openclaw/chroma-db"))
24
+ VENV_PATH = os.environ.get("RECALL_VENV", os.path.expanduser("~/.openclaw/rag-env"))
25
+
26
+ SHARED_DIR = os.path.join(WORKSPACE, "memory", "shared")
27
+ LEARNINGS_FILE = os.path.join(SHARED_DIR, "agent-learnings.md")
28
+
29
+ COLLECTION_LEARNINGS = "agent_learnings"
30
+
31
+ # Activate the venv
32
+ sys.path.insert(0, os.path.join(VENV_PATH, "lib/python3.12/site-packages"))
33
+ for pyver in ["python3.11", "python3.10"]:
34
+ alt_path = os.path.join(VENV_PATH, f"lib/{pyver}/site-packages")
35
+ if os.path.exists(alt_path):
36
+ sys.path.insert(0, alt_path)
37
+
38
+ try:
39
+ import chromadb
40
+ from sentence_transformers import SentenceTransformer
41
+ except ImportError as e:
42
+ print(f"❌ Missing dependency: {e}", file=sys.stderr)
43
+ print("Run 'npx jasper-recall setup' to install dependencies.", file=sys.stderr)
44
+ sys.exit(1)
45
+
46
+
47
+ def generate_id(title: str, agent: str, timestamp: str) -> str:
48
+ """Generate a unique ID for the learning."""
49
+ content = f"{agent}:{title}:{timestamp}"
50
+ return hashlib.md5(content.encode()).hexdigest()[:12]
51
+
52
+
53
+ def append_to_learnings_file(title: str, content: str, agent: str, category: str, dry_run: bool = False):
54
+ """Append learning to the markdown file for human readability."""
55
+ os.makedirs(os.path.dirname(LEARNINGS_FILE), exist_ok=True)
56
+
57
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
58
+ date = datetime.now().strftime("%Y-%m-%d")
59
+
60
+ entry = f"\n## {date} [{category}] - {title}\n"
61
+ entry += f"*Agent: {agent} | {timestamp}*\n\n"
62
+ entry += f"{content}\n"
63
+
64
+ if dry_run:
65
+ print("\n📄 Would append to agent-learnings.md:")
66
+ print("-" * 40)
67
+ print(entry)
68
+ return
69
+
70
+ # Create file with header if it doesn't exist
71
+ if not os.path.exists(LEARNINGS_FILE):
72
+ with open(LEARNINGS_FILE, 'w') as f:
73
+ f.write("# Agent Learnings\n\n")
74
+ f.write("Insights and learnings contributed by sandboxed agents.\n\n")
75
+ f.write("---\n")
76
+
77
+ # Append entry
78
+ with open(LEARNINGS_FILE, 'a') as f:
79
+ f.write(entry)
80
+
81
+ print(f"📄 Added to {os.path.relpath(LEARNINGS_FILE, WORKSPACE)}")
82
+
83
+
84
+ def index_to_chromadb(title: str, content: str, agent: str, category: str, dry_run: bool = False):
85
+ """Index the learning directly to ChromaDB."""
86
+ if dry_run:
87
+ print("\n🗄️ Would index to agent_learnings collection")
88
+ return
89
+
90
+ # Initialize
91
+ os.makedirs(CHROMA_DIR, exist_ok=True)
92
+ client = chromadb.PersistentClient(path=CHROMA_DIR)
93
+
94
+ collection = client.get_or_create_collection(
95
+ name=COLLECTION_LEARNINGS,
96
+ metadata={"description": "Learnings written by sandboxed agents"}
97
+ )
98
+
99
+ # Load model
100
+ model = SentenceTransformer('all-MiniLM-L6-v2')
101
+
102
+ # Prepare document
103
+ timestamp = datetime.now().isoformat()
104
+ doc_id = generate_id(title, agent, timestamp)
105
+
106
+ # Combine title and content for embedding
107
+ full_text = f"{title}\n\n{content}"
108
+ embedding = model.encode([full_text])[0].tolist()
109
+
110
+ metadata = {
111
+ "source": f"agent-learnings/{agent}/{doc_id}",
112
+ "filename": "agent-learnings.md",
113
+ "agent": agent,
114
+ "category": category,
115
+ "title": title,
116
+ "timestamp": timestamp,
117
+ }
118
+
119
+ # Add to collection
120
+ collection.add(
121
+ ids=[doc_id],
122
+ embeddings=[embedding],
123
+ documents=[full_text],
124
+ metadatas=[metadata]
125
+ )
126
+
127
+ print(f"🗄️ Indexed to {COLLECTION_LEARNINGS} (id: {doc_id})")
128
+
129
+
130
+ def main():
131
+ parser = argparse.ArgumentParser(description="Write a learning to shared memory")
132
+ parser.add_argument("title", help="Brief title for the learning")
133
+ parser.add_argument("content", help="Learning content/description")
134
+ parser.add_argument("--agent", default="unknown", help="Agent name (e.g., moltbook, coder)")
135
+ parser.add_argument("--category", default="insight",
136
+ choices=["insight", "engagement", "pattern", "bug", "success", "failure"],
137
+ help="Category of learning")
138
+ parser.add_argument("--dry-run", action="store_true", help="Preview without writing")
139
+ parser.add_argument("--json", action="store_true", help="Output as JSON")
140
+
141
+ args = parser.parse_args()
142
+
143
+ # Validate inputs
144
+ if len(args.title) > 200:
145
+ print("❌ Title too long (max 200 chars)", file=sys.stderr)
146
+ sys.exit(1)
147
+
148
+ if len(args.content) > 5000:
149
+ print("❌ Content too long (max 5000 chars)", file=sys.stderr)
150
+ sys.exit(1)
151
+
152
+ print(f"📝 Writing learning from agent '{args.agent}'...")
153
+ print(f" Category: {args.category}")
154
+ print(f" Title: {args.title}")
155
+
156
+ if args.dry_run:
157
+ print("\n(DRY RUN - no changes will be made)")
158
+
159
+ # Write to both file and ChromaDB
160
+ append_to_learnings_file(args.title, args.content, args.agent, args.category, args.dry_run)
161
+ index_to_chromadb(args.title, args.content, args.agent, args.category, args.dry_run)
162
+
163
+ if not args.dry_run:
164
+ print("\n✅ Learning saved!")
165
+
166
+ if args.json:
167
+ print(json.dumps({
168
+ "success": True,
169
+ "title": args.title,
170
+ "agent": args.agent,
171
+ "category": args.category
172
+ }))
173
+
174
+
175
+ if __name__ == "__main__":
176
+ main()