cozo-memory 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -176,88 +176,116 @@ class InferenceEngine {
176
176
  * @param minSimilarity Minimum similarity for semantic jumps (0.0 - 1.0, Default: 0.7)
177
177
  */
178
178
  async semanticGraphWalk(startEntityId, maxDepth = 3, minSimilarity = 0.7) {
179
- try {
180
- // Get embedding of the start entity for the first semantic jump
181
- const entityRes = await this.db.run('?[embedding] := *entity{id: $id, embedding, @ "NOW"}', { id: startEntityId });
182
- if (entityRes.rows.length === 0)
179
+ // Limit max_depth to 2 to prevent database lock issues with complex queries
180
+ const safeMaxDepth = Math.min(maxDepth, 2);
181
+ if (maxDepth > 2) {
182
+ console.error(`[SemanticWalk] Limiting max_depth from ${maxDepth} to 2 to prevent database locks`);
183
+ }
184
+ // Retry logic with exponential backoff for database lock errors
185
+ const maxRetries = 3;
186
+ const baseDelay = 100; // ms
187
+ const timeout = 30000; // 30 second timeout
188
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
189
+ try {
190
+ // Wrap in timeout promise
191
+ const result = await Promise.race([
192
+ this._executeSemanticWalk(startEntityId, safeMaxDepth, minSimilarity),
193
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Semantic walk timeout')), timeout))
194
+ ]);
195
+ return result;
196
+ }
197
+ catch (e) {
198
+ const isLockError = e.message?.includes('database is locked') || e.message?.includes('code 5');
199
+ const isLastAttempt = attempt === maxRetries - 1;
200
+ if (isLockError && !isLastAttempt) {
201
+ const delay = baseDelay * Math.pow(2, attempt);
202
+ console.error(`[SemanticWalk] Database locked (attempt ${attempt + 1}/${maxRetries}), retrying in ${delay}ms...`);
203
+ await new Promise(resolve => setTimeout(resolve, delay));
204
+ continue;
205
+ }
206
+ console.error(`[SemanticWalk] Failed after ${attempt + 1} attempts:`, e.message);
183
207
  return [];
184
- const startEmbedding = entityRes.rows[0][0];
185
- // Recursive Datalog query
186
- // We avoid complex aggregation of strings in Datalog as this can cause errors.
187
- // Instead, we implicitly group by 'type' as well and filter later in JS.
188
- const query = `
189
- # 1. Start point
190
- path[id, depth, score, type] :=
191
- id = $startId,
192
- depth = 0,
193
- score = 1.0,
194
- type = 'start'
208
+ }
209
+ }
210
+ return [];
211
+ }
212
+ async _executeSemanticWalk(startEntityId, maxDepth, minSimilarity) {
213
+ // Get embedding of the start entity for the first semantic jump
214
+ // Optimized: Remove @ "NOW" validity check for better performance
215
+ const entityRes = await this.db.run('?[embedding] := *entity{id: $id, embedding}', { id: startEntityId });
216
+ if (entityRes.rows.length === 0)
217
+ return [];
218
+ const startEmbedding = entityRes.rows[0][0];
219
+ // Recursive Datalog query - Optimized for performance
220
+ // Removed @ "NOW" validity checks to reduce lock contention
221
+ const query = `
222
+ # 1. Start point
223
+ path[id, depth, score, type] :=
224
+ id = $startId,
225
+ depth = 0,
226
+ score = 1.0,
227
+ type = 'start'
195
228
 
196
- # 2. Recursion: Follow explicit relations
197
- path[next_id, new_depth, new_score, new_type] :=
198
- path[curr_id, depth, score, curr_type],
199
- depth < $maxDepth,
200
- *relationship{from_id: curr_id, to_id: next_id, relation_type, strength, @ "NOW"},
201
- new_depth = depth + 1,
202
- new_score = score * strength,
203
- new_type = if(curr_type == 'start', 'explicit', if(curr_type == 'explicit', 'explicit', 'mixed'))
229
+ # 2. Recursion: Follow explicit relations (optimized - no validity check)
230
+ path[next_id, new_depth, new_score, new_type] :=
231
+ path[curr_id, depth, score, curr_type],
232
+ depth < $maxDepth,
233
+ *relationship{from_id: curr_id, to_id: next_id, relation_type, strength},
234
+ new_depth = depth + 1,
235
+ new_score = score * strength,
236
+ new_type = if(curr_type == 'start', 'explicit', if(curr_type == 'explicit', 'explicit', 'mixed'))
204
237
 
205
- # 3. Recursion: Follow semantic similarity (via HNSW Index)
206
- path[next_id, new_depth, new_score, new_type] :=
207
- path[curr_id, depth, score, curr_type],
208
- depth < $maxDepth,
209
- *entity{id: curr_id, embedding: curr_emb, @ "NOW"}, # Load embedding
210
- # Search for the K nearest neighbors to the current embedding
211
- ~entity:semantic { id: next_id |
212
- query: curr_emb,
213
- k: 5,
214
- ef: 20,
215
- bind_distance: dist
216
- },
217
- next_id != curr_id, # No self-reference
218
- sim = 1.0 - dist,
219
- sim >= $minSim,
220
- new_depth = depth + 1,
221
- new_score = score * sim * 0.8, # Penalize semantic jumps slightly (damping)
222
- new_type = if(curr_type == 'start', 'semantic', if(curr_type == 'semantic', 'semantic', 'mixed'))
238
+ # 3. Recursion: Follow semantic similarity (optimized - no validity check)
239
+ path[next_id, new_depth, new_score, new_type] :=
240
+ path[curr_id, depth, score, curr_type],
241
+ depth < $maxDepth,
242
+ *entity{id: curr_id, embedding: curr_emb},
243
+ # Search for the K nearest neighbors to the current embedding
244
+ ~entity:semantic { id: next_id |
245
+ query: curr_emb,
246
+ k: 5,
247
+ ef: 20,
248
+ bind_distance: dist
249
+ },
250
+ next_id != curr_id,
251
+ sim = 1.0 - dist,
252
+ sim >= $minSim,
253
+ new_depth = depth + 1,
254
+ new_score = score * sim * 0.8,
255
+ new_type = if(curr_type == 'start', 'semantic', if(curr_type == 'semantic', 'semantic', 'mixed'))
223
256
 
224
- # Aggregate result (Grouping by ID and Type)
225
- ?[id, min_depth, max_score, type] :=
226
- path[id, d, s, type],
227
- id != $startId,
228
- min_depth = min(d),
229
- max_score = max(s)
230
- :limit 100
231
- `;
232
- const res = await this.db.run(query, {
233
- startId: startEntityId,
234
- maxDepth: maxDepth,
235
- minSim: minSimilarity
236
- });
237
- // Post-processing in JS: Select best path type per ID
238
- const bestPaths = new Map();
239
- for (const row of res.rows) {
240
- const [id, depth, score, type] = row;
241
- // Cozo sometimes returns arrays or raw values, ensure we have Strings/Numbers
242
- const cleanId = String(id);
243
- const cleanDepth = Number(depth);
244
- const cleanScore = Number(score);
245
- const cleanType = String(type);
246
- if (!bestPaths.has(cleanId) || cleanScore > bestPaths.get(cleanId).path_score) {
247
- bestPaths.set(cleanId, {
248
- entity_id: cleanId,
249
- distance: cleanDepth,
250
- path_score: cleanScore,
251
- path_type: cleanType
252
- });
253
- }
257
+ # Aggregate result (Grouping by ID and Type)
258
+ ?[id, min_depth, max_score, type] :=
259
+ path[id, d, s, type],
260
+ id != $startId,
261
+ min_depth = min(d),
262
+ max_score = max(s)
263
+ :limit 100
264
+ `;
265
+ const res = await this.db.run(query, {
266
+ startId: startEntityId,
267
+ maxDepth: maxDepth,
268
+ minSim: minSimilarity
269
+ });
270
+ // Post-processing in JS: Select best path type per ID
271
+ const bestPaths = new Map();
272
+ for (const row of res.rows) {
273
+ const [id, depth, score, type] = row;
274
+ // Cozo sometimes returns arrays or raw values, ensure we have Strings/Numbers
275
+ const cleanId = String(id);
276
+ const cleanDepth = Number(depth);
277
+ const cleanScore = Number(score);
278
+ const cleanType = String(type);
279
+ if (!bestPaths.has(cleanId) || cleanScore > bestPaths.get(cleanId).path_score) {
280
+ bestPaths.set(cleanId, {
281
+ entity_id: cleanId,
282
+ distance: cleanDepth,
283
+ path_score: cleanScore,
284
+ path_type: cleanType
285
+ });
254
286
  }
255
- return Array.from(bestPaths.values());
256
- }
257
- catch (e) {
258
- console.error("Semantic Graph Walk Failed:", e.message);
259
- return [];
260
287
  }
288
+ return Array.from(bestPaths.values());
261
289
  }
262
290
  /**
263
291
  * Analyzes the cluster structure directly on the HNSW graph (Layer 0).
@@ -0,0 +1,316 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.LogicalEdgesService = void 0;
4
+ /**
5
+ * Logical Edges Service (v1.0)
6
+ *
7
+ * Discovers implicit relationships from entity metadata using logical inference rules.
8
+ *
9
+ * Research Foundation:
10
+ * - SAGE (ICLR 2026): Implicit graph exploration with on-demand edge discovery
11
+ * - Metadata Knowledge Graphs (Atlan 2026): Metadata-driven relationship inference
12
+ * - Knowledge Graph Completion (Frontiers 2025): Predicting implicit relationships
13
+ *
14
+ * Patterns:
15
+ * 1. **Metadata-Based Edges**: Same category, type, domain, etc.
16
+ * 2. **Semantic Edges**: Entities with similar metadata patterns
17
+ * 3. **Hierarchical Edges**: Parent-child relationships from metadata
18
+ * 4. **Contextual Edges**: Time-based, location-based, or domain-based grouping
19
+ * 5. **Transitive Edges**: Derived from existing relationships + metadata
20
+ */
21
+ class LogicalEdgesService {
22
+ db;
23
+ constructor(db) {
24
+ this.db = db;
25
+ }
26
+ /**
27
+ * Discover all logical edges for an entity based on metadata patterns
28
+ */
29
+ async discoverLogicalEdges(entityId) {
30
+ const edges = [];
31
+ // 1. Same Category Edges
32
+ const categoryEdges = await this.findSameCategoryEdges(entityId);
33
+ edges.push(...categoryEdges);
34
+ // 2. Same Type Edges
35
+ const typeEdges = await this.findSameTypeEdges(entityId);
36
+ edges.push(...typeEdges);
37
+ // 3. Hierarchical Edges (parent-child from metadata)
38
+ const hierarchicalEdges = await this.findHierarchicalEdges(entityId);
39
+ edges.push(...hierarchicalEdges);
40
+ // 4. Contextual Edges (domain, time period, location)
41
+ const contextualEdges = await this.findContextualEdges(entityId);
42
+ edges.push(...contextualEdges);
43
+ // 5. Transitive Edges (derived from relationships + metadata)
44
+ const transitiveEdges = await this.findTransitiveLogicalEdges(entityId);
45
+ edges.push(...transitiveEdges);
46
+ // Deduplicate and return
47
+ return this.deduplicateEdges(edges);
48
+ }
49
+ /**
50
+ * Pattern 1: Same Category Edges
51
+ *
52
+ * Entities with the same category metadata are logically related
53
+ * Example: All "Machine Learning" papers are related
54
+ */
55
+ async findSameCategoryEdges(entityId) {
56
+ try {
57
+ const query = `
58
+ # Get the category of the target entity
59
+ source_category[category] :=
60
+ *entity{id: $entity_id, metadata, @ "NOW"},
61
+ category = get(metadata, 'category')
62
+
63
+ # Find all entities with the same category
64
+ ?[other_id, other_name, other_type, confidence, reason] :=
65
+ source_category[category],
66
+ category != null,
67
+ *entity{id: other_id, name: other_name, type: other_type, metadata, @ "NOW"},
68
+ other_id != $entity_id,
69
+ get(metadata, 'category') == category,
70
+ confidence = 0.8,
71
+ reason = concat('Same category: ', category)
72
+ `;
73
+ const result = await this.db.run(query, { entity_id: entityId });
74
+ return result.rows.map((r) => ({
75
+ from_id: entityId,
76
+ to_id: r[0],
77
+ relation_type: "same_category",
78
+ confidence: r[3],
79
+ reason: r[4],
80
+ pattern: "metadata_category"
81
+ }));
82
+ }
83
+ catch (error) {
84
+ console.error("[LogicalEdges] Same category error:", error.message);
85
+ return [];
86
+ }
87
+ }
88
+ /**
89
+ * Pattern 2: Same Type Edges
90
+ *
91
+ * Entities of the same type are logically related
92
+ * Example: All "Person" entities, all "Project" entities
93
+ */
94
+ async findSameTypeEdges(entityId) {
95
+ try {
96
+ const query = `
97
+ # Get the type of the target entity
98
+ source_type[entity_type] :=
99
+ *entity{id: $entity_id, type: entity_type, @ "NOW"}
100
+
101
+ # Find all entities with the same type
102
+ ?[other_id, other_name, confidence, reason] :=
103
+ source_type[entity_type],
104
+ *entity{id: other_id, name: other_name, type: entity_type, @ "NOW"},
105
+ other_id != $entity_id,
106
+ confidence = 0.7,
107
+ reason = concat('Same type: ', entity_type)
108
+ `;
109
+ const result = await this.db.run(query, { entity_id: entityId });
110
+ return result.rows.map((r) => ({
111
+ from_id: entityId,
112
+ to_id: r[0],
113
+ relation_type: "same_type",
114
+ confidence: r[2],
115
+ reason: r[3],
116
+ pattern: "metadata_type"
117
+ }));
118
+ }
119
+ catch (error) {
120
+ console.error("[LogicalEdges] Same type error:", error.message);
121
+ return [];
122
+ }
123
+ }
124
+ /**
125
+ * Pattern 3: Hierarchical Edges
126
+ *
127
+ * Parent-child relationships derived from metadata hierarchy
128
+ * Example: "parent_id" in metadata indicates parent entity
129
+ */
130
+ async findHierarchicalEdges(entityId) {
131
+ try {
132
+ const query = `
133
+ # Get parent_id from metadata
134
+ source_parent[parent_id] :=
135
+ *entity{id: $entity_id, metadata, @ "NOW"},
136
+ parent_id = get(metadata, 'parent_id'),
137
+ parent_id != null
138
+
139
+ # Find parent entity
140
+ ?[parent_id, parent_name, confidence, reason] :=
141
+ source_parent[parent_id],
142
+ *entity{id: parent_id, name: parent_name, @ "NOW"},
143
+ confidence = 0.9,
144
+ reason = 'Parent relationship from metadata'
145
+
146
+ # Also find children (reverse direction)
147
+ ?[child_id, child_name, confidence, reason] :=
148
+ *entity{id: child_id, metadata, @ "NOW"},
149
+ get(metadata, 'parent_id') == $entity_id,
150
+ *entity{id: child_id, name: child_name, @ "NOW"},
151
+ confidence = 0.9,
152
+ reason = 'Child relationship from metadata'
153
+ `;
154
+ const result = await this.db.run(query, { entity_id: entityId });
155
+ return result.rows.map((r) => ({
156
+ from_id: entityId,
157
+ to_id: r[0],
158
+ relation_type: "hierarchical",
159
+ confidence: r[2],
160
+ reason: r[3],
161
+ pattern: "metadata_hierarchy"
162
+ }));
163
+ }
164
+ catch (error) {
165
+ console.error("[LogicalEdges] Hierarchical error:", error.message);
166
+ return [];
167
+ }
168
+ }
169
+ /**
170
+ * Pattern 4: Contextual Edges
171
+ *
172
+ * Entities sharing context (domain, time period, location, organization)
173
+ * Example: All papers from 2025, all entities in "AI" domain
174
+ */
175
+ async findContextualEdges(entityId) {
176
+ try {
177
+ // Simplified contextual edge discovery
178
+ // Find entities with same domain
179
+ const query = `
180
+ # Get domain from metadata
181
+ source_domain[domain] :=
182
+ *entity{id: $entity_id, metadata, @ "NOW"},
183
+ domain = get(metadata, 'domain'),
184
+ domain != null
185
+
186
+ # Find entities with matching domain
187
+ ?[other_id, other_name, confidence, reason] :=
188
+ source_domain[domain],
189
+ *entity{id: other_id, name: other_name, metadata, @ "NOW"},
190
+ other_id != $entity_id,
191
+ get(metadata, 'domain') == domain,
192
+ confidence = 0.75,
193
+ reason = concat('Same domain: ', domain)
194
+ `;
195
+ const result = await this.db.run(query, { entity_id: entityId });
196
+ return result.rows.map((r) => ({
197
+ from_id: entityId,
198
+ to_id: r[0],
199
+ relation_type: "contextual",
200
+ confidence: r[2],
201
+ reason: r[3],
202
+ pattern: "metadata_context"
203
+ }));
204
+ }
205
+ catch (error) {
206
+ console.error("[LogicalEdges] Contextual error:", error.message);
207
+ return [];
208
+ }
209
+ }
210
+ /**
211
+ * Pattern 5: Transitive Logical Edges
212
+ *
213
+ * Derived from existing relationships combined with metadata patterns
214
+ * Example: If A -> B (explicit) and B has same category as C, then A -> C (transitive)
215
+ */
216
+ async findTransitiveLogicalEdges(entityId) {
217
+ try {
218
+ const query = `
219
+ # Get entities connected via explicit relationships
220
+ connected[mid_id] :=
221
+ *relationship{from_id: $entity_id, to_id: mid_id, @ "NOW"}
222
+
223
+ # Get metadata of connected entities
224
+ connected_metadata[mid_id, mid_category, mid_type] :=
225
+ connected[mid_id],
226
+ *entity{id: mid_id, type: mid_type, metadata, @ "NOW"},
227
+ mid_category = get(metadata, 'category')
228
+
229
+ # Find entities with same category as connected entities
230
+ ?[other_id, other_name, confidence, reason] :=
231
+ connected_metadata[mid_id, category, _],
232
+ category != null,
233
+ *entity{id: other_id, name: other_name, metadata, @ "NOW"},
234
+ other_id != $entity_id,
235
+ other_id != mid_id,
236
+ get(metadata, 'category') == category,
237
+ confidence = 0.6,
238
+ reason = concat('Transitive via category match through ', mid_id)
239
+
240
+ # Find entities with same type as connected entities
241
+ ?[other_id, other_name, confidence, reason] :=
242
+ connected_metadata[mid_id, _, entity_type],
243
+ *entity{id: other_id, name: other_name, type: entity_type, @ "NOW"},
244
+ other_id != $entity_id,
245
+ other_id != mid_id,
246
+ confidence = 0.55,
247
+ reason = concat('Transitive via type match through ', mid_id)
248
+ `;
249
+ const result = await this.db.run(query, { entity_id: entityId });
250
+ return result.rows.map((r) => ({
251
+ from_id: entityId,
252
+ to_id: r[0],
253
+ relation_type: "transitive_logical",
254
+ confidence: r[2],
255
+ reason: r[3],
256
+ pattern: "metadata_transitive"
257
+ }));
258
+ }
259
+ catch (error) {
260
+ console.error("[LogicalEdges] Transitive error:", error.message);
261
+ return [];
262
+ }
263
+ }
264
+ /**
265
+ * Deduplicate edges by (from_id, to_id, relation_type)
266
+ * Keep the one with highest confidence
267
+ */
268
+ deduplicateEdges(edges) {
269
+ const map = new Map();
270
+ for (const edge of edges) {
271
+ const key = `${edge.from_id}|${edge.to_id}|${edge.relation_type}`;
272
+ const existing = map.get(key);
273
+ if (!existing || edge.confidence > existing.confidence) {
274
+ map.set(key, edge);
275
+ }
276
+ }
277
+ return Array.from(map.values());
278
+ }
279
+ /**
280
+ * Create logical edges as explicit relationships in the database
281
+ * (Optional: for performance optimization)
282
+ */
283
+ async materializeLogicalEdges(entityId) {
284
+ try {
285
+ const edges = await this.discoverLogicalEdges(entityId);
286
+ let created = 0;
287
+ for (const edge of edges) {
288
+ try {
289
+ const now = Date.now() * 1000;
290
+ await this.db.run(`?[from_id, to_id, relation_type, created_at, strength, metadata] <- [
291
+ [$from_id, $to_id, $rel_type, [${now}, true], $strength, $metadata]
292
+ ] :insert relationship {from_id, to_id, relation_type, created_at => strength, metadata}`, {
293
+ from_id: edge.from_id,
294
+ to_id: edge.to_id,
295
+ rel_type: edge.relation_type,
296
+ strength: edge.confidence,
297
+ metadata: { logical_edge: true, pattern: edge.pattern, reason: edge.reason }
298
+ });
299
+ created++;
300
+ }
301
+ catch (e) {
302
+ // Ignore duplicate key errors
303
+ if (!e.message?.includes("duplicate")) {
304
+ console.error("[LogicalEdges] Materialization error:", e.message);
305
+ }
306
+ }
307
+ }
308
+ return created;
309
+ }
310
+ catch (error) {
311
+ console.error("[LogicalEdges] Materialization failed:", error.message);
312
+ return 0;
313
+ }
314
+ }
315
+ }
316
+ exports.LogicalEdgesService = LogicalEdgesService;