@velvetmonkey/vault-core 2.0.89 → 2.0.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ /**
2
+ * AST-based protected zone detection
3
+ *
4
+ * Uses mdast tree to compute ProtectedZone[] more accurately than regex,
5
+ * especially for nested callouts, tables, and multi-line HTML comments.
6
+ *
7
+ * Supplemental regex handles Obsidian-specific syntax not in mdast:
8
+ * [[wikilinks]], bare URLs, #hashtags, %%comments%%
9
+ */
10
+ import type { Root } from 'mdast';
11
+ import type { ProtectedZone } from './types.js';
12
+ /**
13
+ * Extract protected zones from an mdast tree.
14
+ */
15
+ export declare function getProtectedZonesFromAst(tree: Root, content: string): ProtectedZone[];
16
+ //# sourceMappingURL=astProtectedZones.d.ts.map
@@ -0,0 +1,129 @@
1
+ /**
2
+ * AST-based protected zone detection
3
+ *
4
+ * Uses mdast tree to compute ProtectedZone[] more accurately than regex,
5
+ * especially for nested callouts, tables, and multi-line HTML comments.
6
+ *
7
+ * Supplemental regex handles Obsidian-specific syntax not in mdast:
8
+ * [[wikilinks]], bare URLs, #hashtags, %%comments%%
9
+ */
10
+ import { visit, SKIP } from 'unist-util-visit';
11
+ function getOffset(node) {
12
+ const pos = node.position;
13
+ if (pos?.start &&
14
+ pos?.end &&
15
+ typeof pos.start.offset === 'number' &&
16
+ typeof pos.end.offset === 'number') {
17
+ return { start: pos.start.offset, end: pos.end.offset };
18
+ }
19
+ return null;
20
+ }
21
+ function zone(start, end, type) {
22
+ return { start, end, type };
23
+ }
24
+ /**
25
+ * Check if a blockquote node is an Obsidian callout (> [!type] syntax).
26
+ * If so, protect the entire blockquote — fixes nested callout bugs.
27
+ */
28
+ function isObsidianCallout(node) {
29
+ if (node.type !== 'blockquote')
30
+ return false;
31
+ const children = node.children;
32
+ if (!children?.length)
33
+ return false;
34
+ const firstChild = children[0];
35
+ if (firstChild.type !== 'paragraph')
36
+ return false;
37
+ const paraChildren = firstChild.children;
38
+ if (!paraChildren?.length)
39
+ return false;
40
+ const firstInline = paraChildren[0];
41
+ if (firstInline.type !== 'text')
42
+ return false;
43
+ const value = firstInline.value;
44
+ if (typeof value !== 'string')
45
+ return false;
46
+ return /^\[![\w-]+\]/.test(value);
47
+ }
48
+ /**
49
+ * Extract protected zones from an mdast tree.
50
+ */
51
+ export function getProtectedZonesFromAst(tree, content) {
52
+ const zones = [];
53
+ visit(tree, (node) => {
54
+ const off = getOffset(node);
55
+ if (!off)
56
+ return;
57
+ switch (node.type) {
58
+ case 'yaml':
59
+ // Frontmatter: extend zone to include the --- delimiters and trailing newline
60
+ {
61
+ const fmStart = 0;
62
+ const closingIdx = content.indexOf('---', off.end);
63
+ const fmEnd = closingIdx !== -1 ? closingIdx + 3 : off.end;
64
+ const afterEnd = fmEnd < content.length && content[fmEnd] === '\n' ? fmEnd + 1 : fmEnd;
65
+ zones.push(zone(fmStart, afterEnd, 'frontmatter'));
66
+ }
67
+ break;
68
+ case 'code':
69
+ zones.push(zone(off.start, off.end, 'code_block'));
70
+ break;
71
+ case 'inlineCode':
72
+ zones.push(zone(off.start, off.end, 'inline_code'));
73
+ break;
74
+ case 'link':
75
+ zones.push(zone(off.start, off.end, 'markdown_link'));
76
+ break;
77
+ case 'heading':
78
+ zones.push(zone(off.start, off.end, 'header'));
79
+ break;
80
+ case 'html':
81
+ zones.push(zone(off.start, off.end, 'html_tag'));
82
+ break;
83
+ case 'inlineMath':
84
+ case 'math':
85
+ zones.push(zone(off.start, off.end, 'math'));
86
+ break;
87
+ case 'table':
88
+ zones.push(zone(off.start, off.end, 'table'));
89
+ break;
90
+ case 'blockquote':
91
+ if (isObsidianCallout(node)) {
92
+ zones.push(zone(off.start, off.end, 'obsidian_callout'));
93
+ return SKIP; // Don't recurse into callout children
94
+ }
95
+ break;
96
+ }
97
+ });
98
+ // Supplemental regex for Obsidian-specific syntax not in mdast
99
+ addRegexZones(content, zones);
100
+ // Sort by start position
101
+ zones.sort((a, b) => a.start - b.start);
102
+ return zones;
103
+ }
104
+ /**
105
+ * Add zones for Obsidian-specific syntax that mdast doesn't parse:
106
+ * - [[wikilinks]]
107
+ * - Bare URLs
108
+ * - #hashtags
109
+ * - %%comments%%
110
+ */
111
+ function addRegexZones(content, zones) {
112
+ const patterns = [
113
+ [/\[\[[^\]]+\]\]/g, 'wikilink'],
114
+ [/https?:\/\/[^\s\)\]]+(?:\([^\)]+\))?[^\s\)\]]*/g, 'url'],
115
+ [/#[\w-]+/g, 'hashtag'],
116
+ [/%%.*?%%/gs, 'obsidian_comment'],
117
+ ];
118
+ for (const [pattern, type] of patterns) {
119
+ let match;
120
+ while ((match = pattern.exec(content)) !== null) {
121
+ zones.push({
122
+ start: match.index,
123
+ end: match.index + match[0].length,
124
+ type,
125
+ });
126
+ }
127
+ }
128
+ }
129
+ //# sourceMappingURL=astProtectedZones.js.map
package/dist/index.d.ts CHANGED
@@ -7,7 +7,9 @@
7
7
  export type { EntityIndex, EntityCategory, EntityWithAliases, Entity, EntityWithType, ScanOptions, WikilinkOptions, WikilinkResult, ImplicitEntityConfig, ExtendedWikilinkOptions, ImplicitEntityMatch, ResolveAliasOptions, ProtectedZone, ProtectedZoneType, } from './types.js';
8
8
  export { scanVaultEntities, getAllEntities, getAllEntitiesWithTypes, getEntityName, getEntityAliases, loadEntityCache, saveEntityCache, ENTITY_CACHE_VERSION, } from './entities.js';
9
9
  export { applyWikilinks, processWikilinks, resolveAliasWikilinks, suggestWikilinks, detectImplicitEntities, findEntityMatches, IMPLICIT_EXCLUDE_WORDS, } from './wikilinks.js';
10
- export { getProtectedZones, isInProtectedZone, rangeOverlapsProtectedZone, } from './protectedZones.js';
10
+ export { getProtectedZones, getProtectedZonesRegex, isInProtectedZone, rangeOverlapsProtectedZone, } from './protectedZones.js';
11
+ export { parseMarkdown } from './parseMarkdown.js';
12
+ export { getProtectedZonesFromAst } from './astProtectedZones.js';
11
13
  export { OperationLogger, createLoggerFromConfig, generateSessionId, getSessionId, setSessionId, } from './logging/index.js';
12
14
  export type { OperationLogEntry, SessionMetrics, AggregatedMetrics, LoggingConfig, ProductId, } from './logging/index.js';
13
15
  export { openStateDb, deleteStateDb, stateDbExists, searchEntities, searchEntitiesPrefix, getEntityByName, getEntitiesByAlias, getAllEntitiesFromDb, getEntityIndexFromDb, recordEntityMention, getEntityRecency, getAllRecency, setWriteState, getWriteState, deleteWriteState, setFlywheelConfig, getFlywheelConfig, getAllFlywheelConfig, saveFlywheelConfigToDb, loadFlywheelConfigFromDb, getStateDbMetadata, recordMergeDismissal, getDismissedMergePairs, saveVaultIndexCache, loadVaultIndexCache, getVaultIndexCacheInfo, rebuildEntitiesFts, loadContentHashes, saveContentHashBatch, renameContentHash, SCHEMA_VERSION, STATE_DB_FILENAME, FLYWHEEL_DIR, } from './sqlite.js';
package/dist/index.js CHANGED
@@ -9,7 +9,10 @@ export { scanVaultEntities, getAllEntities, getAllEntitiesWithTypes, getEntityNa
9
9
  // Wikilinks
10
10
  export { applyWikilinks, processWikilinks, resolveAliasWikilinks, suggestWikilinks, detectImplicitEntities, findEntityMatches, IMPLICIT_EXCLUDE_WORDS, } from './wikilinks.js';
11
11
  // Protected zones
12
- export { getProtectedZones, isInProtectedZone, rangeOverlapsProtectedZone, } from './protectedZones.js';
12
+ export { getProtectedZones, getProtectedZonesRegex, isInProtectedZone, rangeOverlapsProtectedZone, } from './protectedZones.js';
13
+ // AST parsing
14
+ export { parseMarkdown } from './parseMarkdown.js';
15
+ export { getProtectedZonesFromAst } from './astProtectedZones.js';
13
16
  // Logging (unified cross-product logging)
14
17
  export { OperationLogger, createLoggerFromConfig, generateSessionId, getSessionId, setSessionId, } from './logging/index.js';
15
18
  // SQLite State Database
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Markdown AST parser for protected zone detection
3
+ *
4
+ * Uses mdast (fromMarkdown) with GFM, frontmatter, and math extensions.
5
+ * Returns null on parse error to trigger regex fallback.
6
+ */
7
+ import type { Root } from 'mdast';
8
+ export interface ParseMarkdownOptions {
9
+ /** Skip AST parsing entirely, return null */
10
+ forceRegex?: boolean;
11
+ }
12
+ /**
13
+ * Parse markdown content into an AST tree.
14
+ * Returns null on parse error (triggers regex fallback).
15
+ */
16
+ export declare function parseMarkdown(content: string, options?: ParseMarkdownOptions): Root | null;
17
+ //# sourceMappingURL=parseMarkdown.d.ts.map
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Markdown AST parser for protected zone detection
3
+ *
4
+ * Uses mdast (fromMarkdown) with GFM, frontmatter, and math extensions.
5
+ * Returns null on parse error to trigger regex fallback.
6
+ */
7
+ import { fromMarkdown } from 'mdast-util-from-markdown';
8
+ import { gfmFromMarkdown } from 'mdast-util-gfm';
9
+ import { frontmatterFromMarkdown } from 'mdast-util-frontmatter';
10
+ import { mathFromMarkdown } from 'mdast-util-math';
11
+ import { gfm } from 'micromark-extension-gfm';
12
+ import { frontmatter } from 'micromark-extension-frontmatter';
13
+ import { math } from 'micromark-extension-math';
14
+ /**
15
+ * Parse markdown content into an AST tree.
16
+ * Returns null on parse error (triggers regex fallback).
17
+ */
18
+ export function parseMarkdown(content, options) {
19
+ if (options?.forceRegex) {
20
+ return null;
21
+ }
22
+ try {
23
+ return fromMarkdown(content, {
24
+ extensions: [gfm(), frontmatter(['yaml']), math()],
25
+ mdastExtensions: [gfmFromMarkdown(), frontmatterFromMarkdown(['yaml']), mathFromMarkdown()],
26
+ });
27
+ }
28
+ catch {
29
+ return null;
30
+ }
31
+ }
32
+ //# sourceMappingURL=parseMarkdown.js.map
@@ -1,6 +1,9 @@
1
1
  /**
2
2
  * Protected zones detection for wikilink application
3
3
  *
4
+ * AST-first approach: uses mdast for accurate zone detection (nested callouts,
5
+ * tables, multi-line HTML), with regex fallback on parse failure.
6
+ *
4
7
  * These are areas in markdown content where wikilinks should NOT be applied:
5
8
  * - YAML frontmatter
6
9
  * - Code blocks (``` ... ```)
@@ -13,7 +16,8 @@
13
16
  * - Obsidian comments (%% ... %%)
14
17
  * - Math expressions ($ ... $ and $$ ... $$)
15
18
  * - Markdown headers (# to ###### at line start)
16
- * - Obsidian callouts (> [!type] syntax)
19
+ * - Obsidian callouts (> [!type] syntax — entire block, including nested)
20
+ * - Tables (GFM pipe tables)
17
21
  */
18
22
  import type { ProtectedZone } from './types.js';
19
23
  /**
@@ -22,7 +26,15 @@ import type { ProtectedZone } from './types.js';
22
26
  */
23
27
  export declare function findFrontmatterEnd(content: string): number;
24
28
  /**
25
- * Get all protected zones in content where wikilinks should not be applied
29
+ * Get all protected zones using regex-only detection (legacy/fallback).
30
+ * Exported for testing and explicit fallback use.
31
+ */
32
+ export declare function getProtectedZonesRegex(content: string): ProtectedZone[];
33
+ /**
34
+ * Get all protected zones in content where wikilinks should not be applied.
35
+ *
36
+ * AST-first: parses markdown into AST for accurate detection of nested
37
+ * callouts, tables, and HTML comments. Falls back to regex on parse failure.
26
38
  */
27
39
  export declare function getProtectedZones(content: string): ProtectedZone[];
28
40
  /**
@@ -1,6 +1,9 @@
1
1
  /**
2
2
  * Protected zones detection for wikilink application
3
3
  *
4
+ * AST-first approach: uses mdast for accurate zone detection (nested callouts,
5
+ * tables, multi-line HTML), with regex fallback on parse failure.
6
+ *
4
7
  * These are areas in markdown content where wikilinks should NOT be applied:
5
8
  * - YAML frontmatter
6
9
  * - Code blocks (``` ... ```)
@@ -13,8 +16,11 @@
13
16
  * - Obsidian comments (%% ... %%)
14
17
  * - Math expressions ($ ... $ and $$ ... $$)
15
18
  * - Markdown headers (# to ###### at line start)
16
- * - Obsidian callouts (> [!type] syntax)
19
+ * - Obsidian callouts (> [!type] syntax — entire block, including nested)
20
+ * - Tables (GFM pipe tables)
17
21
  */
22
+ import { parseMarkdown } from './parseMarkdown.js';
23
+ import { getProtectedZonesFromAst } from './astProtectedZones.js';
18
24
  /**
19
25
  * Find where YAML frontmatter ends
20
26
  * @returns Character index after closing ---, or 0 if no frontmatter
@@ -58,9 +64,10 @@ function findPatternZones(content, pattern, type) {
58
64
  return zones;
59
65
  }
60
66
  /**
61
- * Get all protected zones in content where wikilinks should not be applied
67
+ * Get all protected zones using regex-only detection (legacy/fallback).
68
+ * Exported for testing and explicit fallback use.
62
69
  */
63
- export function getProtectedZones(content) {
70
+ export function getProtectedZonesRegex(content) {
64
71
  const zones = [];
65
72
  // 1. YAML frontmatter (must be first)
66
73
  const frontmatterEnd = findFrontmatterEnd(content);
@@ -97,6 +104,19 @@ export function getProtectedZones(content) {
97
104
  zones.sort((a, b) => a.start - b.start);
98
105
  return zones;
99
106
  }
107
+ /**
108
+ * Get all protected zones in content where wikilinks should not be applied.
109
+ *
110
+ * AST-first: parses markdown into AST for accurate detection of nested
111
+ * callouts, tables, and HTML comments. Falls back to regex on parse failure.
112
+ */
113
+ export function getProtectedZones(content) {
114
+ const tree = parseMarkdown(content);
115
+ if (tree) {
116
+ return getProtectedZonesFromAst(tree, content);
117
+ }
118
+ return getProtectedZonesRegex(content);
119
+ }
100
120
  /**
101
121
  * Check if a position is within any protected zone
102
122
  */
package/dist/types.d.ts CHANGED
@@ -75,7 +75,7 @@ export interface ProtectedZone {
75
75
  /**
76
76
  * Types of protected zones
77
77
  */
78
- export type ProtectedZoneType = 'frontmatter' | 'code_block' | 'inline_code' | 'wikilink' | 'markdown_link' | 'url' | 'hashtag' | 'html_tag' | 'obsidian_comment' | 'math' | 'header' | 'obsidian_callout';
78
+ export type ProtectedZoneType = 'frontmatter' | 'code_block' | 'inline_code' | 'wikilink' | 'markdown_link' | 'url' | 'hashtag' | 'html_tag' | 'obsidian_comment' | 'math' | 'header' | 'obsidian_callout' | 'table';
79
79
  /**
80
80
  * Options for entity scanning
81
81
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@velvetmonkey/vault-core",
3
- "version": "2.0.89",
3
+ "version": "2.0.91",
4
4
  "description": "Shared vault utilities for Flywheel ecosystem (entity scanning, wikilinks, protected zones)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -36,7 +36,15 @@
36
36
  "prepublishOnly": "npm run build"
37
37
  },
38
38
  "dependencies": {
39
- "better-sqlite3": "^11.0.0"
39
+ "better-sqlite3": "^11.0.0",
40
+ "mdast-util-from-markdown": "^2.0.0",
41
+ "mdast-util-gfm": "^3.0.0",
42
+ "mdast-util-frontmatter": "^2.0.0",
43
+ "mdast-util-math": "^3.0.0",
44
+ "micromark-extension-gfm": "^3.0.0",
45
+ "micromark-extension-frontmatter": "^2.0.0",
46
+ "micromark-extension-math": "^3.0.0",
47
+ "unist-util-visit": "^5.0.0"
40
48
  },
41
49
  "devDependencies": {
42
50
  "@types/better-sqlite3": "^7.6.8",