neotoma 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +2 -2
  2. package/dist/actions.d.ts.map +1 -1
  3. package/dist/actions.js +322 -62
  4. package/dist/actions.js.map +1 -1
  5. package/dist/cli/commands/mirror.d.ts +40 -1
  6. package/dist/cli/commands/mirror.d.ts.map +1 -1
  7. package/dist/cli/commands/mirror.js +180 -0
  8. package/dist/cli/commands/mirror.js.map +1 -1
  9. package/dist/cli/config.d.ts.map +1 -1
  10. package/dist/cli/config.js +6 -0
  11. package/dist/cli/config.js.map +1 -1
  12. package/dist/cli/doctor.d.ts +21 -1
  13. package/dist/cli/doctor.d.ts.map +1 -1
  14. package/dist/cli/doctor.js +32 -0
  15. package/dist/cli/doctor.js.map +1 -1
  16. package/dist/cli/index.d.ts.map +1 -1
  17. package/dist/cli/index.js +493 -33
  18. package/dist/cli/index.js.map +1 -1
  19. package/dist/mcp_server_card.d.ts +6 -0
  20. package/dist/mcp_server_card.d.ts.map +1 -0
  21. package/dist/mcp_server_card.js +91 -0
  22. package/dist/mcp_server_card.js.map +1 -0
  23. package/dist/repositories/sqlite/sqlite_client.d.ts.map +1 -1
  24. package/dist/repositories/sqlite/sqlite_client.js +10 -1
  25. package/dist/repositories/sqlite/sqlite_client.js.map +1 -1
  26. package/dist/server.d.ts +7 -0
  27. package/dist/server.d.ts.map +1 -1
  28. package/dist/server.js +195 -19
  29. package/dist/server.js.map +1 -1
  30. package/dist/services/canonical_mirror_git.d.ts.map +1 -1
  31. package/dist/services/canonical_mirror_git.js +17 -3
  32. package/dist/services/canonical_mirror_git.js.map +1 -1
  33. package/dist/services/dashboard_stats.d.ts +13 -0
  34. package/dist/services/dashboard_stats.d.ts.map +1 -1
  35. package/dist/services/dashboard_stats.js +27 -0
  36. package/dist/services/dashboard_stats.js.map +1 -1
  37. package/dist/services/duplicate_detection.d.ts +50 -0
  38. package/dist/services/duplicate_detection.d.ts.map +1 -0
  39. package/dist/services/duplicate_detection.js +136 -0
  40. package/dist/services/duplicate_detection.js.map +1 -0
  41. package/dist/services/entity_queries.d.ts +4 -0
  42. package/dist/services/entity_queries.d.ts.map +1 -1
  43. package/dist/services/entity_queries.js +13 -1
  44. package/dist/services/entity_queries.js.map +1 -1
  45. package/dist/services/entity_resolution.d.ts +148 -8
  46. package/dist/services/entity_resolution.d.ts.map +1 -1
  47. package/dist/services/entity_resolution.js +341 -107
  48. package/dist/services/entity_resolution.js.map +1 -1
  49. package/dist/services/interpretation.d.ts.map +1 -1
  50. package/dist/services/interpretation.js +19 -6
  51. package/dist/services/interpretation.js.map +1 -1
  52. package/dist/services/observation_storage.d.ts +11 -0
  53. package/dist/services/observation_storage.d.ts.map +1 -1
  54. package/dist/services/observation_storage.js +6 -0
  55. package/dist/services/observation_storage.js.map +1 -1
  56. package/dist/services/recent_record_activity.d.ts +15 -0
  57. package/dist/services/recent_record_activity.d.ts.map +1 -0
  58. package/dist/services/recent_record_activity.js +135 -0
  59. package/dist/services/recent_record_activity.js.map +1 -0
  60. package/dist/services/schema_definitions.d.ts.map +1 -1
  61. package/dist/services/schema_definitions.js +228 -0
  62. package/dist/services/schema_definitions.js.map +1 -1
  63. package/dist/services/schema_registry.d.ts +86 -5
  64. package/dist/services/schema_registry.d.ts.map +1 -1
  65. package/dist/services/schema_registry.js +99 -8
  66. package/dist/services/schema_registry.js.map +1 -1
  67. package/dist/services/snapshot_computation.d.ts.map +1 -1
  68. package/dist/services/snapshot_computation.js +102 -0
  69. package/dist/services/snapshot_computation.js.map +1 -1
  70. package/dist/shared/action_handlers/entity_handlers.d.ts +2 -0
  71. package/dist/shared/action_handlers/entity_handlers.d.ts.map +1 -1
  72. package/dist/shared/action_handlers/entity_handlers.js +16 -2
  73. package/dist/shared/action_handlers/entity_handlers.js.map +1 -1
  74. package/dist/shared/action_handlers/entity_identifier_handler.d.ts +22 -0
  75. package/dist/shared/action_handlers/entity_identifier_handler.d.ts.map +1 -1
  76. package/dist/shared/action_handlers/entity_identifier_handler.js +122 -9
  77. package/dist/shared/action_handlers/entity_identifier_handler.js.map +1 -1
  78. package/dist/shared/action_schemas.d.ts +75 -12
  79. package/dist/shared/action_schemas.d.ts.map +1 -1
  80. package/dist/shared/action_schemas.js +17 -0
  81. package/dist/shared/action_schemas.js.map +1 -1
  82. package/dist/shared/contract_mappings.d.ts.map +1 -1
  83. package/dist/shared/contract_mappings.js +18 -0
  84. package/dist/shared/contract_mappings.js.map +1 -1
  85. package/dist/shared/openapi_types.d.ts +281 -1
  86. package/dist/shared/openapi_types.d.ts.map +1 -1
  87. package/dist/tool_definitions.d.ts +1 -1
  88. package/dist/tool_definitions.d.ts.map +1 -1
  89. package/dist/tool_definitions.js +84 -1
  90. package/dist/tool_definitions.js.map +1 -1
  91. package/openapi.yaml +306 -1
  92. package/package.json +2 -2
package/README.md CHANGED
@@ -79,7 +79,7 @@ Most AI memory systems optimize storage or retrieval. Neotoma enforces state int
79
79
  Ask your AI agent whether Neotoma fits your workflow. Paste this into Claude, Cursor, ChatGPT, or any MCP-capable tool:
80
80
 
81
81
  ```
82
- Read https://neotoma.io/evaluate. Before you answer, inspect any broader tool, workspace, and configuration context you can access so you assess my real workflow rather than just this chat. Then tell me whether Neotoma is a real fit for my workflow, what data would be highest value to persist first, and what I should do next.
82
+ Read https://neotoma.io/evaluate. Use any tool, workspace, and chat context already available in this session to assess my real workflow. Avoid ad-hoc shell introspection just to profile me; if Neotoma is already installed, use `neotoma doctor --json` for status instead. Then tell me whether Neotoma is a real fit for my workflow, what data would be highest value to persist first, and what I should do next.
83
83
  ```
84
84
 
85
85
  The agent reads the evaluation page, checks your context, and gives you an honest assessment. If it's a fit, the agent carries you through install and activation.
@@ -89,7 +89,7 @@ The agent reads the evaluation page, checks your context, and gives you an hones
89
89
  Agents install Neotoma themselves. Paste this prompt into Claude, Cursor, ChatGPT, or Codex:
90
90
 
91
91
  ```
92
- Read https://neotoma.io/install and carry me through the install-first Neotoma flow. Install, activate it with my data, and configure my current tool for robust ongoing use.
92
+ Read https://neotoma.io/install and carry me through the install-first Neotoma flow. Use `neotoma` commands for status and setup. If Neotoma is already installed, start with `neotoma doctor --json`; otherwise install it and run `neotoma setup --tool <my_tool> --yes`. Avoid ad-hoc shell introspection or arbitrary repo scripts. Then activate Neotoma with my data and configure my current tool for robust ongoing use.
93
93
  ```
94
94
 
95
95
  The agent handles npm install, initialization, and MCP configuration. **Manual install:**
@@ -1 +1 @@
1
- {"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAoF9B,eAAO,MAAM,GAAG,6CAAY,CAAC;AAwL7B,yGAAyG;AACzG,wBAAgB,cAAc,CAAC,GAAG,EAAE,OAAO,CAAC,OAAO,GAAG,OAAO,CAK5D;AA2pJD,wBAAsB,eAAe;;;eAgCpC"}
1
+ {"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAsF9B,eAAO,MAAM,GAAG,6CAAY,CAAC;AA2M7B,yGAAyG;AACzG,wBAAgB,cAAc,CAAC,GAAG,EAAE,OAAO,CAAC,OAAO,GAAG,OAAO,CAK5D;AA2gKD,wBAAsB,eAAe;;;eAgCpC"}
package/dist/actions.js CHANGED
@@ -31,6 +31,8 @@ import { queryEntitiesWithCount } from "./shared/action_handlers/entity_handlers
31
31
  import { retrieveEntityByIdentifierWithFallback } from "./shared/action_handlers/entity_identifier_handler.js";
32
32
  import { prepareEntitySnapshotWithEmbedding, upsertEntitySnapshotWithEmbedding, } from "./services/entity_snapshot_embedding.js";
33
33
  import { readOpenApiActionsFile, readOpenApiFile } from "./shared/openapi_file.js";
34
+ import { buildSmitheryServerCard } from "./mcp_server_card.js";
35
+ import { listRecentRecordActivity } from "./services/recent_record_activity.js";
34
36
  export const app = express();
35
37
  // Trust proxy headers (required for express-rate-limit when X-Forwarded-For is present)
36
38
  app.set("trust proxy", true);
@@ -100,6 +102,25 @@ const oauthRegisterLimit = rateLimit({
100
102
  });
101
103
  // Favicon (no-auth) to avoid 401 noise when not present on disk
102
104
  app.get("/favicon.ico", (_req, res) => res.status(204).end());
105
+ // Smithery / MCP registry static metadata when automatic scan cannot finish (same host as /mcp)
106
+ app.get("/.well-known/mcp/server-card.json", (_req, res) => {
107
+ const override = process.env.NEOTOMA_MCP_SERVER_CARD_JSON?.trim();
108
+ if (override) {
109
+ try {
110
+ const parsed = JSON.parse(override);
111
+ res.type("application/json");
112
+ return res.json(parsed);
113
+ }
114
+ catch {
115
+ return res.status(500).type("application/json").json({
116
+ error: "invalid_server_card_json",
117
+ error_description: "NEOTOMA_MCP_SERVER_CARD_JSON is not valid JSON",
118
+ });
119
+ }
120
+ }
121
+ res.type("application/json");
122
+ res.json(buildSmitheryServerCard());
123
+ });
103
124
  // ============================================================================
104
125
  // OAuth discovery (RFC 8414 / MCP Authorization) for Cursor and other clients
105
126
  // ============================================================================
@@ -1552,7 +1573,7 @@ app.post("/entities/query", async (req, res) => {
1552
1573
  try {
1553
1574
  // Get authenticated user_id (REQUIRED)
1554
1575
  const userId = await getAuthenticatedUserId(req, parsed.data.user_id);
1555
- const { entity_type, search, limit, offset, sort_by, sort_order, published, published_after, published_before, include_snapshots, include_merged, } = parsed.data;
1576
+ const { entity_type, search, limit, offset, sort_by, sort_order, published, published_after, published_before, include_snapshots, include_merged, updated_since, created_since, } = parsed.data;
1556
1577
  const { entities, total } = await queryEntitiesWithCount({
1557
1578
  userId,
1558
1579
  entityType: entity_type,
@@ -1566,6 +1587,8 @@ app.post("/entities/query", async (req, res) => {
1566
1587
  search,
1567
1588
  limit,
1568
1589
  offset,
1590
+ updatedSince: updated_since,
1591
+ createdSince: created_since,
1569
1592
  });
1570
1593
  return res.json({
1571
1594
  entities,
@@ -2304,6 +2327,20 @@ app.get("/timeline/:id", async (req, res) => {
2304
2327
  return sendError(res, 500, "DB_QUERY_FAILED", message, errorDetails);
2305
2328
  }
2306
2329
  });
2330
+ // GET /api/record_activity - Cross-table recent rows for inspector (ordered by latest timestamps)
2331
+ // REQUIRES AUTHENTICATION
2332
+ app.get("/record_activity", async (req, res) => {
2333
+ try {
2334
+ const userId = await getAuthenticatedUserId(req, req.query.user_id);
2335
+ const limit = parseInt(String(req.query.limit ?? "50"), 10) || 50;
2336
+ const offset = parseInt(String(req.query.offset ?? "0"), 10) || 0;
2337
+ const result = listRecentRecordActivity(userId, limit, offset);
2338
+ return res.json(result);
2339
+ }
2340
+ catch (error) {
2341
+ return handleApiError(req, res, error, "Failed to list recent record activity", "DB_QUERY_FAILED", "APIError:record_activity");
2342
+ }
2343
+ });
2307
2344
  // GET /api/sources - Get source list (FU-301)
2308
2345
  app.get("/sources", async (req, res) => {
2309
2346
  try {
@@ -2620,8 +2657,10 @@ app.post("/observations/create", async (req, res) => {
2620
2657
  }
2621
2658
  });
2622
2659
  async function storeStructuredForApi(params) {
2623
- const { userId, entities, sourcePriority, idempotencyKey, originalFilename } = params;
2624
- const { resolveEntity } = await import("./services/entity_resolution.js");
2660
+ const { userId, entities, sourcePriority, idempotencyKey, originalFilename, relationships, commit: commitInput, strict: strictInput, } = params;
2661
+ const commit = commitInput !== false;
2662
+ const strict = strictInput === true;
2663
+ const { resolveEntityWithTrace, CanonicalNameUnresolvedError, MergeRefusedError, } = await import("./services/entity_resolution.js");
2625
2664
  const { detectFlatPackedRows, FlatPackedRowsError } = await import("./services/flat_packed_detection.js");
2626
2665
  // Reject flat-packed rows (whole tables smuggled into a single entity as
2627
2666
  // `<prefix>_<index>_<suffix>` keys). These cannot produce per-row snapshots
@@ -2678,12 +2717,23 @@ async function storeStructuredForApi(params) {
2678
2717
  source_priority: sourcePriority,
2679
2718
  },
2680
2719
  });
2681
- const createdEntities = [];
2682
- for (const entityData of entities) {
2720
+ const resolved = [];
2721
+ const issues = [];
2722
+ for (let observation_index = 0; observation_index < entities.length; observation_index++) {
2723
+ const entityData = entities[observation_index];
2683
2724
  let entity_type = entityData.entity_type;
2684
2725
  if (!entity_type) {
2685
2726
  throw new Error("entity_type is required for each entity");
2686
2727
  }
2728
+ // Per-entity overrides: `intent: "create_new"` is shorthand for strict on
2729
+ // this record; `target_id` forces extend mode (bypass derivation).
2730
+ const intent = typeof entityData.intent === "string"
2731
+ ? entityData.intent
2732
+ : undefined;
2733
+ const targetId = typeof entityData.target_id === "string"
2734
+ ? entityData.target_id
2735
+ : undefined;
2736
+ const effectiveStrict = strict || intent === "create_new";
2687
2737
  // Schema-agnostic duplicate-type collapse (e.g. `places` -> `place`,
2688
2738
  // aliased-type -> canonical). Applied before storing so the resolved
2689
2739
  // entity_id hashes into the canonical type rather than a near-duplicate.
@@ -2703,66 +2753,182 @@ async function storeStructuredForApi(params) {
2703
2753
  catch (equivErr) {
2704
2754
  logger.warn(`Equivalence check failed for ${entity_type}: ${equivErr instanceof Error ? equivErr.message : String(equivErr)}`);
2705
2755
  }
2706
- // eslint-disable-next-line @typescript-eslint/no-unused-vars -- intentional omit
2707
- const { entity_type: _removed, ...fields } = entityData;
2708
- const entity_id = await resolveEntity({
2709
- entityType: entity_type,
2710
- fields,
2711
- userId,
2712
- });
2713
- const obsData = await createObservation({
2714
- entity_id,
2715
- entity_type,
2716
- schema_version: "1.0",
2717
- source_id: storageResult.sourceId,
2718
- interpretation_id: null,
2719
- observed_at: new Date().toISOString(),
2720
- specificity_score: 1.0,
2721
- source_priority: sourcePriority,
2722
- fields,
2723
- user_id: userId,
2724
- });
2756
+ const { entity_type: _removedType, intent: _removedIntent, target_id: _removedTargetId, ...fields } = entityData;
2757
+ void _removedType;
2758
+ void _removedIntent;
2759
+ void _removedTargetId;
2725
2760
  try {
2726
- const { recomputeSnapshot } = await import("./services/snapshot_computation.js");
2727
- await recomputeSnapshot(entity_id, userId);
2728
- }
2729
- catch (snapshotErr) {
2730
- logger.warn(`Snapshot recompute failed for ${entity_id}: ${snapshotErr}`);
2761
+ const result = await resolveEntityWithTrace({
2762
+ entityType: entity_type,
2763
+ fields,
2764
+ userId,
2765
+ commit,
2766
+ strict: effectiveStrict,
2767
+ targetId,
2768
+ });
2769
+ resolved.push({
2770
+ observation_index,
2771
+ entity_type,
2772
+ entity_id: result.entityId,
2773
+ fields,
2774
+ trace: {
2775
+ canonical_name: result.trace.canonicalName,
2776
+ resolver_path: result.trace.path,
2777
+ identity_basis: result.trace.identityBasis,
2778
+ identity_rule: result.trace.identityRule,
2779
+ action: result.trace.action,
2780
+ },
2781
+ intent,
2782
+ targetId,
2783
+ });
2731
2784
  }
2732
- // Schema-driven auto-linking: if the entity's active schema declares
2733
- // reference_fields, create typed relationships to the referenced
2734
- // entities (REFERS_TO by default). Silent fallback when no schema or no
2735
- // match exists — never invent targets.
2736
- try {
2737
- const { schemaRegistry } = await import("./services/schema_registry.js");
2738
- const schemaEntry = await schemaRegistry.loadActiveSchema(entity_type, userId);
2739
- if (schemaEntry?.schema_definition?.reference_fields?.length) {
2740
- const { autoLinkReferenceFields } = await import("./services/schema_reference_linking.js");
2741
- await autoLinkReferenceFields({
2742
- entityId: entity_id,
2743
- entityType: entity_type,
2744
- fields,
2745
- schema: schemaEntry.schema_definition,
2746
- userId,
2747
- sourceId: storageResult.sourceId,
2785
+ catch (err) {
2786
+ if (err instanceof CanonicalNameUnresolvedError) {
2787
+ issues.push({
2788
+ observation_index,
2789
+ entity_type,
2790
+ code: "ERR_CANONICAL_NAME_UNRESOLVED",
2791
+ message: err.message,
2792
+ details: {
2793
+ seen_fields: err.seenFields,
2794
+ attempted_value: err.attemptedValue,
2795
+ },
2796
+ });
2797
+ }
2798
+ else if (err instanceof MergeRefusedError) {
2799
+ issues.push({
2800
+ observation_index,
2801
+ entity_type,
2802
+ code: "ERR_MERGE_REFUSED",
2803
+ message: err.message,
2804
+ details: {
2805
+ entity_id: err.entityId,
2806
+ canonical_name: err.canonicalName,
2807
+ resolver_path: err.resolverPath,
2808
+ },
2748
2809
  });
2749
2810
  }
2811
+ else {
2812
+ throw err;
2813
+ }
2750
2814
  }
2751
- catch (linkErr) {
2752
- logger.warn(`Auto-link reference fields failed for ${entity_type}/${entity_id}: ${linkErr instanceof Error ? linkErr.message : String(linkErr)}`);
2815
+ }
2816
+ if (issues.length > 0) {
2817
+ const aggregate = new Error(`Structured store refused: ${issues.length} observation(s) failed resolution.`);
2818
+ aggregate.code = "ERR_STORE_RESOLUTION_FAILED";
2819
+ aggregate.issues = issues;
2820
+ throw aggregate;
2821
+ }
2822
+ const createdEntities = [];
2823
+ for (const r of resolved) {
2824
+ let observation_id = null;
2825
+ let snapshotAfter = null;
2826
+ if (commit) {
2827
+ const obsData = await createObservation({
2828
+ entity_id: r.entity_id,
2829
+ entity_type: r.entity_type,
2830
+ schema_version: "1.0",
2831
+ source_id: storageResult.sourceId,
2832
+ interpretation_id: null,
2833
+ observed_at: new Date().toISOString(),
2834
+ specificity_score: 1.0,
2835
+ source_priority: sourcePriority,
2836
+ fields: r.fields,
2837
+ user_id: userId,
2838
+ identity_basis: r.trace.identity_basis,
2839
+ identity_rule: r.trace.identity_rule,
2840
+ });
2841
+ observation_id = obsData.id;
2842
+ try {
2843
+ const { recomputeSnapshot } = await import("./services/snapshot_computation.js");
2844
+ const snap = await recomputeSnapshot(r.entity_id, userId);
2845
+ snapshotAfter =
2846
+ snap
2847
+ ?.snapshot ?? null;
2848
+ }
2849
+ catch (snapshotErr) {
2850
+ logger.warn(`Snapshot recompute failed for ${r.entity_id}: ${snapshotErr}`);
2851
+ }
2852
+ // Schema-driven auto-linking: if the entity's active schema declares
2853
+ // reference_fields, create typed relationships to the referenced
2854
+ // entities (REFERS_TO by default). Silent fallback when no schema or no
2855
+ // match exists — never invent targets.
2856
+ try {
2857
+ const { schemaRegistry } = await import("./services/schema_registry.js");
2858
+ const schemaEntry = await schemaRegistry.loadActiveSchema(r.entity_type, userId);
2859
+ if (schemaEntry?.schema_definition?.reference_fields?.length) {
2860
+ const { autoLinkReferenceFields } = await import("./services/schema_reference_linking.js");
2861
+ await autoLinkReferenceFields({
2862
+ entityId: r.entity_id,
2863
+ entityType: r.entity_type,
2864
+ fields: r.fields,
2865
+ schema: schemaEntry.schema_definition,
2866
+ userId,
2867
+ sourceId: storageResult.sourceId,
2868
+ });
2869
+ }
2870
+ }
2871
+ catch (linkErr) {
2872
+ logger.warn(`Auto-link reference fields failed for ${r.entity_type}/${r.entity_id}: ${linkErr instanceof Error ? linkErr.message : String(linkErr)}`);
2873
+ }
2753
2874
  }
2754
2875
  createdEntities.push({
2755
- entity_id,
2756
- entity_type,
2757
- observation_id: obsData.id,
2876
+ entity_id: r.entity_id,
2877
+ entity_type: r.entity_type,
2878
+ observation_id,
2879
+ observation_index: r.observation_index,
2880
+ action: r.trace.action,
2881
+ canonical_name: r.trace.canonical_name,
2882
+ resolver_path: r.trace.resolver_path,
2883
+ identity_basis: r.trace.identity_basis,
2884
+ identity_rule: r.trace.identity_rule,
2885
+ entity_snapshot_after: snapshotAfter,
2758
2886
  });
2759
2887
  }
2888
+ // Relationships (parity with MCP store_structured). Indices are resolved
2889
+ // against the observation order; commit=false skips creation.
2890
+ const relationshipsCreated = [];
2891
+ if (commit && relationships && relationships.length > 0) {
2892
+ const { relationshipsService } = await import("./services/relationships.js");
2893
+ for (const rel of relationships) {
2894
+ const source = resolved[rel.source_index];
2895
+ const target = resolved[rel.target_index];
2896
+ if (!source || !target) {
2897
+ logger.warn(`[STORE] Skipping relationship: invalid source_index=${rel.source_index} ` +
2898
+ `or target_index=${rel.target_index} (have ${resolved.length} entities).`);
2899
+ continue;
2900
+ }
2901
+ try {
2902
+ await relationshipsService.createRelationship({
2903
+ source_entity_id: source.entity_id,
2904
+ target_entity_id: target.entity_id,
2905
+ relationship_type: rel.relationship_type,
2906
+ source_id: storageResult.sourceId,
2907
+ user_id: userId,
2908
+ });
2909
+ relationshipsCreated.push({
2910
+ relationship_type: rel.relationship_type,
2911
+ source_entity_id: source.entity_id,
2912
+ target_entity_id: target.entity_id,
2913
+ });
2914
+ }
2915
+ catch (relErr) {
2916
+ logger.warn(`Failed to create relationship ${rel.relationship_type} ` +
2917
+ `${source.entity_id} -> ${target.entity_id}: ${relErr instanceof Error ? relErr.message : String(relErr)}`);
2918
+ }
2919
+ }
2920
+ }
2760
2921
  return {
2761
2922
  success: true,
2762
- source_id: storageResult.sourceId,
2763
- entities_created: createdEntities.length,
2764
- observations_created: createdEntities.length,
2923
+ commit,
2924
+ source_id: commit ? storageResult.sourceId : null,
2925
+ entities_created: commit
2926
+ ? createdEntities.filter((e) => e.action === "created" || e.action === "extended")
2927
+ .length
2928
+ : 0,
2929
+ observations_created: commit ? createdEntities.length : 0,
2765
2930
  entities: createdEntities,
2931
+ relationships_created: relationshipsCreated,
2766
2932
  };
2767
2933
  }
2768
2934
  async function storeUnstructuredForApi(params) {
@@ -2817,6 +2983,9 @@ app.post("/store", async (req, res) => {
2817
2983
  sourcePriority: parsed.data.source_priority ?? 100,
2818
2984
  idempotencyKey: parsed.data.idempotency_key,
2819
2985
  originalFilename: parsed.data.original_filename,
2986
+ relationships: parsed.data.relationships,
2987
+ commit: parsed.data.commit,
2988
+ strict: parsed.data.strict,
2820
2989
  });
2821
2990
  }
2822
2991
  if (hasUnstructured) {
@@ -2871,6 +3040,21 @@ app.post("/store", async (req, res) => {
2871
3040
  logWarn("EntityTypeGuardError:store", req, { code: errCode, message });
2872
3041
  return sendError(res, 400, errCode, message);
2873
3042
  }
3043
+ if (error &&
3044
+ typeof error === "object" &&
3045
+ errCode === "ERR_STORE_RESOLUTION_FAILED") {
3046
+ const err = error;
3047
+ logWarn("StoreResolutionError:store", req, {
3048
+ issue_count: err.issues?.length ?? 0,
3049
+ });
3050
+ return res.status(400).json({
3051
+ error: {
3052
+ code: "ERR_STORE_RESOLUTION_FAILED",
3053
+ message: err.message,
3054
+ issues: err.issues ?? [],
3055
+ },
3056
+ });
3057
+ }
2874
3058
  if (error &&
2875
3059
  typeof error === "object" &&
2876
3060
  errCode === "ERR_FLAT_PACKED_ROWS") {
@@ -2934,7 +3118,7 @@ app.post("/observations/query", async (req, res) => {
2934
3118
  try {
2935
3119
  // Get authenticated user_id (REQUIRED)
2936
3120
  const userId = await getAuthenticatedUserId(req, parsed.data.user_id);
2937
- const { observation_id, entity_id, entity_type, source_id, limit, offset } = parsed.data;
3121
+ const { observation_id, entity_id, entity_type, source_id, limit, offset, updated_since, created_since, } = parsed.data;
2938
3122
  // Build query - ALWAYS filter by authenticated user_id
2939
3123
  let query = db.from("observations").select("*", { count: "exact" }).eq("user_id", userId); // SECURITY: Always filter by authenticated user
2940
3124
  if (observation_id) {
@@ -2949,6 +3133,14 @@ app.post("/observations/query", async (req, res) => {
2949
3133
  if (source_id) {
2950
3134
  query = query.eq("source_id", source_id);
2951
3135
  }
3136
+ if (updated_since) {
3137
+ // Observations are immutable; treat updated_since as a synonym for
3138
+ // observed_at >= updated_since so clients have a single "since" knob.
3139
+ query = query.gte("observed_at", updated_since);
3140
+ }
3141
+ if (created_since) {
3142
+ query = query.gte("observed_at", created_since);
3143
+ }
2952
3144
  query = query.order("observed_at", { ascending: false }).range(offset, offset + limit - 1);
2953
3145
  const { data, error, count } = await query;
2954
3146
  if (error)
@@ -2969,6 +3161,49 @@ app.post("/observations/query", async (req, res) => {
2969
3161
  return sendError(res, 500, "DB_QUERY_FAILED", message);
2970
3162
  }
2971
3163
  });
3164
+ // GET /entities/duplicates - List candidate duplicate entity pairs (R5).
3165
+ // Read-only fuzzy post-hoc detector. Never auto-merges. Hands off to
3166
+ // /entities/merge once an operator or agent confirms a pair.
3167
+ app.get("/entities/duplicates", async (req, res) => {
3168
+ try {
3169
+ const entityType = typeof req.query.entity_type === "string" ? req.query.entity_type : undefined;
3170
+ if (!entityType) {
3171
+ return sendError(res, 400, "VALIDATION_INVALID_FORMAT", "entity_type query parameter is required");
3172
+ }
3173
+ const providedUserId = typeof req.query.user_id === "string" ? req.query.user_id : undefined;
3174
+ const authenticatedUserId = await getAuthenticatedUserId(req, providedUserId);
3175
+ if (providedUserId && providedUserId !== authenticatedUserId) {
3176
+ return sendError(res, 403, "FORBIDDEN", "user_id does not match authenticated user.");
3177
+ }
3178
+ const thresholdRaw = typeof req.query.threshold === "string" ? req.query.threshold : undefined;
3179
+ const limitRaw = typeof req.query.limit === "string" ? req.query.limit : undefined;
3180
+ const threshold = thresholdRaw ? Number(thresholdRaw) : undefined;
3181
+ if (threshold !== undefined && (Number.isNaN(threshold) || threshold <= 0 || threshold > 1)) {
3182
+ return sendError(res, 400, "VALIDATION_INVALID_FORMAT", "threshold must be a number in (0, 1]");
3183
+ }
3184
+ const limit = limitRaw ? Number(limitRaw) : undefined;
3185
+ if (limit !== undefined && (!Number.isFinite(limit) || limit < 1 || limit > 200)) {
3186
+ return sendError(res, 400, "VALIDATION_INVALID_FORMAT", "limit must be an integer in [1, 200]");
3187
+ }
3188
+ const { findDuplicateCandidates } = await import("./services/duplicate_detection.js");
3189
+ const candidates = await findDuplicateCandidates({
3190
+ entityType,
3191
+ userId: authenticatedUserId,
3192
+ threshold,
3193
+ limit,
3194
+ });
3195
+ return res.json({
3196
+ candidates,
3197
+ entity_type: entityType,
3198
+ threshold: threshold ?? null,
3199
+ });
3200
+ }
3201
+ catch (error) {
3202
+ logError("APIError:entities_duplicates", req, error);
3203
+ const message = error instanceof Error ? error.message : "Failed to list potential duplicates";
3204
+ return sendError(res, 500, "DB_QUERY_FAILED", message);
3205
+ }
3206
+ });
2972
3207
  // POST /api/entities/merge - Merge duplicate entities
2973
3208
  // REQUIRES AUTHENTICATION - validates user_id matches authenticated user and entities belong to user
2974
3209
  app.post("/entities/merge", async (req, res) => {
@@ -3044,14 +3279,20 @@ app.post("/list_observations", async (req, res) => {
3044
3279
  });
3045
3280
  return sendValidationError(res, parsed.error.issues);
3046
3281
  }
3047
- const { entity_id, limit = 100, offset = 0 } = parsed.data;
3048
- const query = db
3282
+ const { entity_id, limit = 100, offset = 0, updated_since, created_since } = parsed.data;
3283
+ let query = db
3049
3284
  .from("observations")
3050
3285
  .select("*")
3051
- .eq("entity_id", entity_id)
3286
+ .eq("entity_id", entity_id);
3287
+ if (updated_since) {
3288
+ query = query.gte("observed_at", updated_since);
3289
+ }
3290
+ if (created_since) {
3291
+ query = query.gte("observed_at", created_since);
3292
+ }
3293
+ const { data, error } = await query
3052
3294
  .order("observed_at", { ascending: false })
3053
3295
  .range(offset, offset + limit - 1);
3054
- const { data, error } = await query;
3055
3296
  if (error) {
3056
3297
  logError("DbError:list_observations", req, error);
3057
3298
  return sendError(res, 500, "DB_QUERY_FAILED", error.message);
@@ -3219,13 +3460,16 @@ app.post("/retrieve_entity_by_identifier", async (req, res) => {
3219
3460
  return sendValidationError(res, parsed.error.issues);
3220
3461
  }
3221
3462
  try {
3222
- const { identifier, entity_type } = parsed.data;
3223
- const userId = await getAuthenticatedUserId(req, undefined);
3463
+ const { identifier, entity_type, by, limit, include_observations, observations_limit } = parsed.data;
3464
+ const userId = await getAuthenticatedUserId(req, parsed.data.user_id);
3224
3465
  const result = await retrieveEntityByIdentifierWithFallback({
3225
3466
  identifier,
3226
3467
  entityType: entity_type,
3227
3468
  userId,
3228
- limit: 100,
3469
+ limit: limit ?? 100,
3470
+ by,
3471
+ includeObservations: include_observations,
3472
+ observationsLimit: observations_limit,
3229
3473
  });
3230
3474
  logDebug("Success:retrieve_entity_by_identifier", req, {
3231
3475
  identifier,
@@ -3686,12 +3930,28 @@ app.post("/register_schema", async (req, res) => {
3686
3930
  const userId = await getAuthenticatedUserId(req, parsed.data.user_id);
3687
3931
  const { entity_type, schema_definition, reducer_config, schema_version = "1.0", user_specific = false, activate = false, force = false, } = parsed.data;
3688
3932
  const { schemaRegistry } = await import("./services/schema_registry.js");
3933
+ // R2 back-compat: existing HTTP/CLI callers may register schemas
3934
+ // without declaring canonical_name_fields or identity_opt_out (e.g. the
3935
+ // bootstrap path from `neotoma schemas register`). Default to an
3936
+ // explicit identity_opt_out so registration still succeeds while
3937
+ // surfacing the gap loudly via startup logs and stats. Clients that
3938
+ // want strong identity should set `canonical_name_fields` on the
3939
+ // request payload.
3940
+ const definitionWithIdentity = (() => {
3941
+ const def = schema_definition;
3942
+ if (!def || typeof def !== "object")
3943
+ return schema_definition;
3944
+ if (def.canonical_name_fields || def.identity_opt_out)
3945
+ return schema_definition;
3946
+ logWarn("DefaultIdentityOptOut:register_schema", req, { entity_type });
3947
+ return { ...def, identity_opt_out: "heuristic_canonical_name" };
3948
+ })();
3689
3949
  let newSchema;
3690
3950
  try {
3691
3951
  newSchema = await schemaRegistry.register({
3692
3952
  entity_type,
3693
3953
  schema_version,
3694
- schema_definition: schema_definition,
3954
+ schema_definition: definitionWithIdentity,
3695
3955
  reducer_config: (reducer_config || { merge_policies: {} }),
3696
3956
  user_id: userId,
3697
3957
  user_specific,