cozo-memory 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -12,12 +12,12 @@ const cozo_node_1 = require("cozo-node");
12
12
  const zod_1 = require("zod");
13
13
  const uuid_1 = require("uuid");
14
14
  const path_1 = __importDefault(require("path"));
15
+ const fs_1 = __importDefault(require("fs"));
16
+ const pdf_mjs_1 = require("pdfjs-dist/legacy/build/pdf.mjs");
15
17
  const hybrid_search_1 = require("./hybrid-search");
16
18
  const inference_engine_1 = require("./inference-engine");
17
19
  exports.DB_PATH = path_1.default.resolve(__dirname, "..", "memory_db.cozo");
18
20
  const DB_ENGINE = process.env.DB_ENGINE || "sqlite"; // "sqlite" or "rocksdb"
19
- const EMBEDDING_MODEL = "Xenova/bge-m3";
20
- const EMBEDDING_DIM = 1024;
21
21
  exports.USER_ENTITY_ID = "global_user_profile";
22
22
  exports.USER_ENTITY_NAME = "The User";
23
23
  exports.USER_ENTITY_TYPE = "User";
@@ -484,6 +484,9 @@ class MemoryServer {
484
484
  async setupSchema() {
485
485
  try {
486
486
  console.error("[Schema] Initializing schema...");
487
+ // Get embedding dimensions from service
488
+ const EMBEDDING_DIM = this.embeddingService.getDimensions();
489
+ console.error(`[Schema] Using embedding dimensions: ${EMBEDDING_DIM}`);
487
490
  const existingRelations = await this.db.run("::relations");
488
491
  const relations = existingRelations.rows.map((r) => r[0]);
489
492
  // Entity Table
@@ -1029,7 +1032,8 @@ class MemoryServer {
1029
1032
  [$id, [${now}, true], $name, $type, $embedding, $name_embedding, $metadata]
1030
1033
  ] :insert entity {id, created_at => name, type, embedding, name_embedding, metadata}
1031
1034
  `, { id, name, type, embedding, name_embedding, metadata: metadata || {} });
1032
- return { id, name, type, status: "Entity created" };
1035
+ const created_at_iso = new Date(Math.floor(now / 1000)).toISOString();
1036
+ return { id, name, type, created_at: now, created_at_iso, status: "Entity created" };
1033
1037
  }
1034
1038
  async initUserProfile() {
1035
1039
  try {
@@ -1188,9 +1192,12 @@ class MemoryServer {
1188
1192
  // Optional: Automatic inference after new observation (in background)
1189
1193
  const suggestionsRaw = await this.inferenceEngine.inferRelations(entityId);
1190
1194
  const suggestions = await this.formatInferredRelationsForContext(suggestionsRaw);
1195
+ const created_at_iso = new Date(Math.floor(now / 1000)).toISOString();
1191
1196
  return {
1192
1197
  id,
1193
1198
  entity_id: entityId,
1199
+ created_at: now,
1200
+ created_at_iso,
1194
1201
  status: "Observation saved",
1195
1202
  inferred_suggestions: suggestions
1196
1203
  };
@@ -1244,7 +1251,15 @@ class MemoryServer {
1244
1251
  strength: args.strength ?? 1.0,
1245
1252
  metadata: args.metadata || {}
1246
1253
  });
1247
- return { status: "Relationship created" };
1254
+ const created_at_iso = new Date(Math.floor(now / 1000)).toISOString();
1255
+ return {
1256
+ from_id: args.from_id,
1257
+ to_id: args.to_id,
1258
+ relation_type: args.relation_type,
1259
+ created_at: now,
1260
+ created_at_iso,
1261
+ status: "Relationship created"
1262
+ };
1248
1263
  }
1249
1264
  async exploreGraph(args) {
1250
1265
  await this.initPromise;
@@ -1664,7 +1679,39 @@ ids[id] <- $ids
1664
1679
  async ingestFile(args) {
1665
1680
  await this.initPromise;
1666
1681
  try {
1667
- const content = (args.content ?? "").trim();
1682
+ // Check that either file_path or content is provided
1683
+ if (!args.file_path && !args.content) {
1684
+ return { error: "Either file_path or content must be provided" };
1685
+ }
1686
+ // Read content from file if file_path is provided
1687
+ let content;
1688
+ if (args.file_path) {
1689
+ try {
1690
+ if (args.format === "pdf") {
1691
+ // Read PDF file and extract text using pdfjs-dist
1692
+ const data = new Uint8Array(fs_1.default.readFileSync(args.file_path));
1693
+ const loadingTask = (0, pdf_mjs_1.getDocument)({ data });
1694
+ const pdf = await loadingTask.promise;
1695
+ const numPages = pdf.numPages;
1696
+ const pageTextPromises = Array.from({ length: numPages }, async (_, i) => {
1697
+ const page = await pdf.getPage(i + 1);
1698
+ const textContent = await page.getTextContent();
1699
+ return textContent.items.map((item) => item.str).join(' ');
1700
+ });
1701
+ const pageTexts = await Promise.all(pageTextPromises);
1702
+ content = pageTexts.join('\n').trim();
1703
+ }
1704
+ else {
1705
+ content = fs_1.default.readFileSync(args.file_path, 'utf-8').trim();
1706
+ }
1707
+ }
1708
+ catch (error) {
1709
+ return { error: `Failed to read file: ${error.message}` };
1710
+ }
1711
+ }
1712
+ else {
1713
+ content = (args.content ?? "").trim();
1714
+ }
1668
1715
  if (!content)
1669
1716
  return { error: "Content must not be empty" };
1670
1717
  let entityId = undefined;
@@ -1701,7 +1748,7 @@ ids[id] <- $ids
1701
1748
  const deduplicate = args.deduplicate ?? true;
1702
1749
  const chunking = args.chunking ?? "none";
1703
1750
  const observations = [];
1704
- if (args.format === "markdown") {
1751
+ if (args.format === "markdown" || args.format === "pdf") {
1705
1752
  if (chunking === "paragraphs") {
1706
1753
  const parts = content
1707
1754
  .split(/\r?\n\s*\r?\n+/g)
@@ -2230,9 +2277,10 @@ ids[id] <- $ids
2230
2277
  entity_id: zod_1.z.string().optional().describe("ID of the target entity"),
2231
2278
  entity_name: zod_1.z.string().optional().describe("Name of the target entity (will be created if not exists)"),
2232
2279
  entity_type: zod_1.z.string().optional().default("Document").describe("Type of the target entity (only when creating)"),
2233
- format: zod_1.z.enum(["markdown", "json"]).describe("Input format"),
2280
+ format: zod_1.z.enum(["markdown", "json", "pdf"]).describe("Input format"),
2234
2281
  chunking: zod_1.z.enum(["none", "paragraphs"]).optional().default("none").describe("Chunking for Markdown"),
2235
- content: zod_1.z.string().describe("File content (or LLM summary)"),
2282
+ file_path: zod_1.z.string().optional().describe("Path to file on disk (alternative to content parameter)"),
2283
+ content: zod_1.z.string().optional().describe("File content (or LLM summary) - required if file_path not provided"),
2236
2284
  metadata: MetadataSchema.optional().describe("Metadata for entity creation"),
2237
2285
  observation_metadata: MetadataSchema.optional().describe("Metadata applied to all observations"),
2238
2286
  deduplicate: zod_1.z.boolean().optional().default(true).describe("Skip exact duplicates"),
@@ -2240,6 +2288,9 @@ ids[id] <- $ids
2240
2288
  }).refine((v) => Boolean(v.entity_id) || Boolean(v.entity_name), {
2241
2289
  message: "entity_id or entity_name is required for ingest_file",
2242
2290
  path: ["entity_id"],
2291
+ }).refine((v) => Boolean(v.file_path) || Boolean(v.content), {
2292
+ message: "file_path or content is required for ingest_file",
2293
+ path: ["file_path"],
2243
2294
  }),
2244
2295
  ]);
2245
2296
  const MutateMemoryParameters = zod_1.z.object({
@@ -2254,9 +2305,10 @@ ids[id] <- $ids
2254
2305
  entity_type: zod_1.z.string().optional().describe("Only when entity_name is used and entity is created new"),
2255
2306
  text: zod_1.z.string().optional().describe("For add_observation (required)"),
2256
2307
  datalog: zod_1.z.string().optional().describe("For add_inference_rule (required)"),
2257
- format: zod_1.z.enum(["markdown", "json"]).optional().describe("For ingest_file (required)"),
2308
+ format: zod_1.z.enum(["markdown", "json", "pdf"]).optional().describe("For ingest_file (required)"),
2258
2309
  chunking: zod_1.z.enum(["none", "paragraphs"]).optional().describe("Optional for ingest_file (for markdown)"),
2259
- content: zod_1.z.string().optional().describe("For ingest_file (required)"),
2310
+ file_path: zod_1.z.string().optional().describe("For ingest_file - path to file on disk (alternative to content)"),
2311
+ content: zod_1.z.string().optional().describe("For ingest_file - file content (required if file_path not provided)"),
2260
2312
  observation_metadata: MetadataSchema.optional().describe("Optional for ingest_file"),
2261
2313
  deduplicate: zod_1.z.boolean().optional().describe("Optional for ingest_file and add_observation"),
2262
2314
  max_observations: zod_1.z.number().optional().describe("Optional for ingest_file"),
@@ -1,7 +1,42 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
2
35
  Object.defineProperty(exports, "__esModule", { value: true });
3
36
  exports.MemoryService = void 0;
4
37
  const uuid_1 = require("uuid");
38
+ const pdf_mjs_1 = require("pdfjs-dist/legacy/build/pdf.mjs");
39
+ const fs = __importStar(require("fs"));
5
40
  class MemoryService {
6
41
  db;
7
42
  embeddings;
@@ -169,7 +204,7 @@ class MemoryService {
169
204
  console.error('[MemoryService] Snapshot created:', snapshotId, stats);
170
205
  return snapshotId;
171
206
  }
172
- async ingestFile(content, format, entityName, entityType = 'Document', chunking = 'paragraphs') {
207
+ async ingestFile(content, format, entityName, entityType = 'Document', chunking = 'paragraphs', filePath) {
173
208
  const searchResults = await this.search(entityName, 1);
174
209
  let entity;
175
210
  if (searchResults.length > 0 && searchResults[0].entity.name.toLowerCase() === entityName.toLowerCase()) {
@@ -179,16 +214,64 @@ class MemoryService {
179
214
  entity = await this.createEntity(entityName, entityType, { format: format });
180
215
  }
181
216
  let chunks = [];
182
- if (format === 'markdown' && chunking === 'paragraphs') {
183
- chunks = content.split(/\n\s*\n/).filter((c) => c.trim().length > 0);
217
+ if (format === 'pdf') {
218
+ try {
219
+ let data;
220
+ // If filePath is provided, read from file
221
+ if (filePath) {
222
+ data = new Uint8Array(fs.readFileSync(filePath));
223
+ }
224
+ else {
225
+ // Otherwise, assume content is base64
226
+ const buffer = Buffer.from(content, 'base64');
227
+ data = new Uint8Array(buffer);
228
+ }
229
+ const loadingTask = (0, pdf_mjs_1.getDocument)({ data });
230
+ const pdf = await loadingTask.promise;
231
+ const numPages = pdf.numPages;
232
+ const pageTextPromises = Array.from({ length: numPages }, async (_, i) => {
233
+ const page = await pdf.getPage(i + 1);
234
+ const textContent = await page.getTextContent();
235
+ return textContent.items.map((item) => item.str).join(' ');
236
+ });
237
+ const pageTexts = await Promise.all(pageTextPromises);
238
+ const text = pageTexts.join('\n');
239
+ if (chunking === 'paragraphs') {
240
+ chunks = text.split(/\n\s*\n/).filter((c) => c.trim().length > 0);
241
+ }
242
+ else {
243
+ chunks = [text];
244
+ }
245
+ }
246
+ catch (e) {
247
+ console.error('[MemoryService] PDF parsing error:', e);
248
+ throw new Error(`Failed to parse PDF: ${e instanceof Error ? e.message : String(e)}`);
249
+ }
250
+ }
251
+ else if (format === 'markdown') {
252
+ // For markdown, also support file path
253
+ let textContent = content;
254
+ if (filePath) {
255
+ textContent = fs.readFileSync(filePath, 'utf-8');
256
+ }
257
+ if (chunking === 'paragraphs') {
258
+ chunks = textContent.split(/\n\s*\n/).filter((c) => c.trim().length > 0);
259
+ }
260
+ else {
261
+ chunks = [textContent];
262
+ }
184
263
  }
185
264
  else if (format === 'json') {
265
+ let textContent = content;
266
+ if (filePath) {
267
+ textContent = fs.readFileSync(filePath, 'utf-8');
268
+ }
186
269
  try {
187
- const data = JSON.parse(content);
270
+ const data = JSON.parse(textContent);
188
271
  chunks = [JSON.stringify(data, null, 2)];
189
272
  }
190
273
  catch (e) {
191
- chunks = [content];
274
+ chunks = [textContent];
192
275
  }
193
276
  }
194
277
  else {
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const index_1 = require("./index");
4
+ async function debugHybridSearch() {
5
+ console.log('Initializing MemoryServer...');
6
+ const server = new index_1.MemoryServer();
7
+ await server.initPromise;
8
+ // Check database content
9
+ console.log('\n=== Database Content ===');
10
+ const entities = await server.db.run('?[id, name, type] := *entity{id, name, type, @ "NOW"}');
11
+ console.log(`Entities: ${entities.rows.length}`);
12
+ entities.rows.slice(0, 3).forEach((row) => {
13
+ console.log(` - ${row[1]} (${row[2]})`);
14
+ });
15
+ const observations = await server.db.run('?[id, text] := *observation{id, text, @ "NOW"}');
16
+ console.log(`\nObservations: ${observations.rows.length}`);
17
+ observations.rows.slice(0, 3).forEach((row) => {
18
+ console.log(` - ${row[1].substring(0, 60)}...`);
19
+ });
20
+ // Test embedding
21
+ console.log('\n=== Test Embedding ===');
22
+ const testEmbedding = await server.embeddingService.embed('Alice');
23
+ console.log(`Embedding dimensions: ${testEmbedding.length}`);
24
+ console.log(`First 5 values: ${testEmbedding.slice(0, 5)}`);
25
+ // Test HNSW search directly
26
+ console.log('\n=== Test HNSW Search Directly ===');
27
+ try {
28
+ const hnswQuery = `
29
+ ?[id, name, type, dist] :=
30
+ ~entity:name_semantic{id | query: vec([${testEmbedding.join(',')}]), k: 5, bind_distance: dist},
31
+ *entity{id, name, type, @ "NOW"}
32
+ `;
33
+ const hnswResult = await server.db.run(hnswQuery);
34
+ console.log(`HNSW results: ${hnswResult.rows.length}`);
35
+ hnswResult.rows.forEach((row) => {
36
+ console.log(` - ${row[1]} (${row[2]}) - distance: ${row[3]}`);
37
+ });
38
+ }
39
+ catch (e) {
40
+ console.error('HNSW error:', e.message);
41
+ }
42
+ // Test hybridSearch
43
+ console.log('\n=== Test HybridSearch ===');
44
+ const searchResult = await server.hybridSearch.search({
45
+ query: 'Alice',
46
+ limit: 5,
47
+ includeEntities: true,
48
+ includeObservations: true
49
+ });
50
+ console.log('HybridSearch result:', JSON.stringify(searchResult, null, 2));
51
+ }
52
+ debugHybridSearch().catch(console.error);
@@ -0,0 +1,47 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const index_1 = require("./index");
4
+ async function testMCPSearch() {
5
+ console.log('Initializing MemoryServer...');
6
+ const server = new index_1.MemoryServer();
7
+ await server.initPromise;
8
+ console.log('\n=== Test 1: Create Entity via MCP mutate_memory ===');
9
+ const createResult = await server.mutateMemory({
10
+ action: 'create_entity',
11
+ name: 'Test Search Entity',
12
+ type: 'test',
13
+ metadata: { purpose: 'search_test' }
14
+ });
15
+ console.log('Created:', createResult);
16
+ const entityId = createResult.id;
17
+ console.log('\n=== Test 2: Add Observation via MCP mutate_memory ===');
18
+ const obsResult = await server.mutateMemory({
19
+ action: 'add_observation',
20
+ entity_id: entityId,
21
+ text: 'This is a test observation for searching with keywords like authentication and OAuth',
22
+ metadata: { test: true }
23
+ });
24
+ console.log('Added observation:', obsResult);
25
+ console.log('\n=== Test 3: Search via MCP query_memory ===');
26
+ const searchResult = await server.queryMemory({
27
+ action: 'search',
28
+ query: 'authentication',
29
+ limit: 5
30
+ });
31
+ console.log('Search result:', JSON.stringify(searchResult, null, 2));
32
+ console.log('\n=== Test 4: Search for "Alice" ===');
33
+ const aliceResult = await server.queryMemory({
34
+ action: 'search',
35
+ query: 'Alice',
36
+ limit: 5
37
+ });
38
+ console.log('Alice search result:', JSON.stringify(aliceResult, null, 2));
39
+ console.log('\n=== Test 5: Search for "TypeScript" ===');
40
+ const tsResult = await server.queryMemory({
41
+ action: 'search',
42
+ query: 'TypeScript',
43
+ limit: 5
44
+ });
45
+ console.log('TypeScript search result:', JSON.stringify(tsResult, null, 2));
46
+ }
47
+ testMCPSearch().catch(console.error);
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,27 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const index_1 = require("./index");
4
+ async function testSearch() {
5
+ console.log('Initializing MemoryServer...');
6
+ const server = new index_1.MemoryServer();
7
+ await server.initPromise;
8
+ console.log('\n=== Testing Search ===');
9
+ // Test simple search
10
+ const result = await server.hybridSearch.search({
11
+ query: 'Alice',
12
+ limit: 5,
13
+ includeEntities: true,
14
+ includeObservations: true
15
+ });
16
+ console.log('Search result:', JSON.stringify(result, null, 2));
17
+ // Check if result is array or object
18
+ console.log('\nResult type:', typeof result);
19
+ console.log('Is array:', Array.isArray(result));
20
+ if (Array.isArray(result)) {
21
+ console.log(`Found ${result.length} results`);
22
+ }
23
+ else {
24
+ console.log('Result keys:', Object.keys(result));
25
+ }
26
+ }
27
+ testSearch().catch(console.error);
@@ -0,0 +1,44 @@
1
+ "use strict";
2
+ /**
3
+ * Utility functions for timestamp handling
4
+ * Provides both Unix microsecond timestamps and ISO 8601 strings
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.toDualTimestamp = toDualTimestamp;
8
+ exports.nowDual = nowDual;
9
+ exports.parseToDual = parseToDual;
10
+ /**
11
+ * Convert CozoDB microsecond timestamp to dual format
12
+ */
13
+ function toDualTimestamp(microseconds) {
14
+ const milliseconds = Math.floor(microseconds / 1000);
15
+ const date = new Date(milliseconds);
16
+ return {
17
+ timestamp: microseconds,
18
+ iso: date.toISOString()
19
+ };
20
+ }
21
+ /**
22
+ * Get current time in dual format
23
+ */
24
+ function nowDual() {
25
+ const now = Date.now();
26
+ return {
27
+ timestamp: now * 1000, // Convert to microseconds
28
+ iso: new Date(now).toISOString()
29
+ };
30
+ }
31
+ /**
32
+ * Parse ISO string or Unix timestamp to dual format
33
+ */
34
+ function parseToDual(input) {
35
+ if (typeof input === 'number') {
36
+ return toDualTimestamp(input);
37
+ }
38
+ const date = new Date(input);
39
+ const milliseconds = date.getTime();
40
+ return {
41
+ timestamp: milliseconds * 1000, // Convert to microseconds
42
+ iso: date.toISOString()
43
+ };
44
+ }