npm - nx-json-parser - Versions diffs - 1.0.0 → 1.1.0 - Mend

nx-json-parser 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/parser.d.ts +22 -1
package/dist/parser.d.ts.map +1 -1
package/dist/parser.js +191 -13
package/dist/parser.js.map +1 -1
package/dist/plugins/bullet-sections.d.ts.map +1 -1
package/dist/plugins/bullet-sections.js +42 -72
package/dist/plugins/bullet-sections.js.map +1 -1
package/dist/plugins/detect-bullet-mode.d.ts +7 -0
package/dist/plugins/detect-bullet-mode.d.ts.map +1 -0
package/dist/plugins/detect-bullet-mode.js +126 -0
package/dist/plugins/detect-bullet-mode.js.map +1 -0
package/dist/remark-markdown-parser.d.ts +41 -0
package/dist/remark-markdown-parser.d.ts.map +1 -0
package/dist/remark-markdown-parser.js +294 -0
package/dist/remark-markdown-parser.js.map +1 -0
package/dist/types.d.ts +17 -3
package/dist/types.d.ts.map +1 -1
package/dist/types.js +6 -1
package/dist/types.js.map +1 -1
package/package.json +1 -1
package/src/index.ts +1 -0
package/src/parser.ts +213 -15
package/src/plugins/bullet-sections.ts +58 -83
package/src/plugins/bullet-sections.ts.old +86 -0
package/src/plugins/detect-bullet-mode.ts +161 -0
package/src/remark-markdown-parser.ts +340 -0
package/src/types.ts +22 -6
package/test/bullet-mode.test.ts +99 -0

package/src/remark-markdown-parser.ts ADDED Viewed

@@ -0,0 +1,340 @@
+/**
+ * Remark-based Markdown Parser
+ * Replaces regex-based parsing with AST-based parsing using unified/remark
+ */
+import { unified } from 'unified';
+import remarkParse from 'remark-parse';
+import remarkGfm from 'remark-gfm';
+import { visit } from 'unist-util-visit';
+import { toString } from 'mdast-util-to-string';
+import { toCamelCase } from 'nx-helpers';
+export interface MarkdownSection {
+  heading: string;
+  content: string;
+  level: number;
+}
+export enum BulletMode {
+  ARRAY = 'array',
+  SECTIONS = 'sections',
+  AUTO = 'auto'
+}
+export interface RemarkParserOptions {
+  bulletMode?: BulletMode;
+  sectionKeywords?: string[];
+  debug?: boolean;
+}
+export class RemarkMarkdownParser {
+  private processor: any;
+  private options: Required<RemarkParserOptions>;
+  constructor(options: RemarkParserOptions = {}) {
+    this.processor = unified()
+      .use(remarkParse)
+      .use(remarkGfm);
+    this.options = {
+      bulletMode: options.bulletMode || BulletMode.AUTO,
+      sectionKeywords: options.sectionKeywords || this.getDefaultKeywords(),
+      debug: options.debug || false
+    };
+  }
+  /**
+   * Parse markdown into sections
+   */
+  parseSections(markdown: string): MarkdownSection[] {
+    const tree = this.processor.parse(markdown);
+    // Detect bullet mode if AUTO
+    const bulletMode = this.options.bulletMode === BulletMode.AUTO
+      ? this.detectBulletMode(tree)
+      : this.options.bulletMode;
+    if (this.options.debug) {
+      console.log(`📋 Bullet mode: ${bulletMode}`);
+    }
+    if (bulletMode === BulletMode.SECTIONS) {
+      return this.parseSectionsMode(tree, markdown);
+    } else {
+      return this.parseHeadingMode(tree);
+    }
+  }
+  /**
+   * Parse content (tables, lists, text)
+   */
+  parseContent(content: string): any {
+    const trimmed = content.trim();
+    if (!trimmed) return '';
+    const tree = this.processor.parse(trimmed);
+    // Single node - return specific type
+    if (tree.children.length === 1) {
+      const child = tree.children[0];
+      if (child.type === 'table') {
+        return this.parseTable(child);
+      }
+      if (child.type === 'list') {
+        return this.parseList(child);
+      }
+      if (child.type === 'paragraph') {
+        return toString(child);
+      }
+    }
+    // Multiple children - combine
+    const results: any[] = [];
+    for (const child of tree.children) {
+      if (child.type === 'table') {
+        return this.parseTable(child); // Single table dominates
+      } else if (child.type === 'list') {
+        return this.parseList(child); // Single list dominates
+      } else if (child.type === 'paragraph') {
+        results.push(toString(child));
+      } else {
+        results.push(toString(child));
+      }
+    }
+    // Join text results
+    const text = results.join('\n\n').trim();
+    return text || trimmed;
+  }
+  /**
+   * Convert sections to object
+   */
+  sectionsToObject(sections: MarkdownSection[]): Record<string, any> {
+    const result: Record<string, any> = {};
+    for (const section of sections) {
+      const key = toCamelCase(section.heading);
+      result[key] = this.parseContent(section.content);
+    }
+    return result;
+  }
+  // ========================================================================
+  // PRIVATE METHODS - Mode Detection
+  // ========================================================================
+  private detectBulletMode(tree: any): BulletMode {
+    let bulletCount = 0;
+    let bulletsWithContent = 0;
+    let bulletsWithColons = 0;
+    let bulletsWithNestedLists = 0;
+    let bulletsWithKeywords = 0;
+    let totalLength = 0;
+visit(tree, 'list', (listNode: any, index?: number, parent?: any) => {
+  // Only analyze root-level lists
+  if (!parent || parent.type !== 'root') return;
+      for (const listItem of listNode.children) {
+        bulletCount++;
+        const firstChild = listItem.children[0];
+        const text = firstChild ? toString(firstChild) : '';
+        totalLength += text.length;
+        // Check indicators
+        if (text.includes(':')) bulletsWithColons++;
+        if (listItem.children.length > 1) bulletsWithContent++;
+        const hasNestedList = listItem.children.some((c: any) => c.type === 'list');
+        if (hasNestedList) bulletsWithNestedLists++;
+        const lowerText = text.toLowerCase();
+        if (this.options.sectionKeywords.some(kw => lowerText.includes(kw))) {
+          bulletsWithKeywords++;
+        }
+      }
+    });
+    if (bulletCount === 0) return BulletMode.ARRAY;
+    const avgLength = totalLength / bulletCount;
+    // Scoring
+    let sectionScore = 0;
+    let arrayScore = 0;
+    if (bulletsWithColons > 0) sectionScore += 3;
+    if (bulletsWithContent > 0) sectionScore += 3;
+    if (bulletsWithNestedLists > 0) sectionScore += 2;
+    if (bulletsWithKeywords > 0) sectionScore += 2;
+    if (avgLength > 30) sectionScore += 1;
+    if (bulletsWithContent === 0 && bulletsWithColons === 0) arrayScore += 4;
+    if (avgLength < 30) arrayScore += 2;
+    if (bulletCount >= 3 && bulletsWithContent === 0) arrayScore += 2;
+    if (this.options.debug) {
+      console.log(`🔍 Detection scores - Sections: ${sectionScore}, Array: ${arrayScore}`);
+    }
+    return sectionScore > arrayScore ? BulletMode.SECTIONS : BulletMode.ARRAY;
+  }
+  // ========================================================================
+  // PRIVATE METHODS - Parsing Modes
+  // ========================================================================
+  private parseHeadingMode(tree: any): MarkdownSection[] {
+    const sections: MarkdownSection[] = [];
+    let currentSection: MarkdownSection | null = null;
+    let currentContent: string[] = [];
+    for (const node of tree.children) {
+      if (node.type === 'heading') {
+        // Save previous section
+        if (currentSection) {
+          currentSection.content = currentContent.join('\n\n').trim();
+          sections.push(currentSection);
+        }
+        // Start new section
+        currentSection = {
+          heading: toString(node),
+          content: '',
+          level: node.depth
+        };
+        currentContent = [];
+      } else if (currentSection) {
+        // Add to current section content
+        currentContent.push(this.nodeToString(node));
+      }
+    }
+    // Save last section
+    if (currentSection) {
+      currentSection.content = currentContent.join('\n\n').trim();
+      sections.push(currentSection);
+    }
+    return sections;
+  }
+  private parseSectionsMode(tree: any, markdown: string): MarkdownSection[] {
+    const sections: MarkdownSection[] = [];
+    let inList = false;
+    for (const node of tree.children) {
+      if (node.type === 'heading') {
+        // Regular heading
+        const heading = toString(node);
+        const content = ''; // Will be filled by next nodes
+        sections.push({
+          heading,
+          content,
+          level: node.depth
+        });
+      } else if (node.type === 'list') {
+        // Process each list item as a section
+        for (const listItem of node.children) {
+          const section = this.listItemToSection(listItem);
+          if (section) {
+            sections.push(section);
+          }
+        }
+      } else if (sections.length > 0) {
+        // Add content to last section
+        const lastSection = sections[sections.length - 1];
+        const nodeContent = this.nodeToString(node);
+        lastSection.content = lastSection.content
+          ? `${lastSection.content}\n\n${nodeContent}`
+          : nodeContent;
+      }
+    }
+    return sections;
+  }
+  private listItemToSection(listItem: any): MarkdownSection | null {
+    if (!listItem.children || listItem.children.length === 0) {
+      return null;
+    }
+    const firstChild = listItem.children[0];
+    const heading = toString(firstChild).replace(/:$/, ''); // Remove trailing colon
+    const contentNodes = listItem.children.slice(1);
+    let content = '';
+    if (contentNodes.length > 0) {
+      content = contentNodes.map((node: any) => this.nodeToString(node)).join('\n\n').trim();
+    }
+    return {
+      heading,
+      content,
+      level: 1
+    };
+  }
+  // ========================================================================
+  // PRIVATE METHODS - Content Parsing
+  // ========================================================================
+  private parseTable(tableNode: any): any[] {
+    const rows = tableNode.children;
+    if (rows.length === 0) return [];
+    // First row = headers
+    const headerRow = rows[0];
+    const headers = headerRow.children.map((cell: any) =>
+      toCamelCase(toString(cell).trim())
+    );
+    // Data rows
+    return rows.slice(1).map((row: any) => {
+      const obj: any = {};
+      row.children.forEach((cell: any, i: number) => {
+        const key = headers[i] || `column${i}`;
+        obj[key] = toString(cell).trim();
+      });
+      return obj;
+    });
+  }
+  private parseList(listNode: any): string[] {
+    return listNode.children.map((item: any) => {
+      // Get first child only (ignore nested content for simple lists)
+      const firstChild = item.children[0];
+      return toString(firstChild).trim();
+    });
+  }
+  private nodeToString(node: any): string {
+    if (node.type === 'table') {
+      // Return markdown representation or JSON
+      return JSON.stringify(this.parseTable(node));
+    }
+    if (node.type === 'list') {
+      // Return as list
+      return this.parseList(node).map(item => `- ${item}`).join('\n');
+    }
+    return toString(node);
+  }
+  private getDefaultKeywords(): string[] {
+    return [
+      'answer', 'summary', 'introduction', 'conclusion', 'overview',
+      'assumptions', 'unknowns', 'evidence', 'notes', 'details',
+      'description', 'background', 'analysis', 'findings', 'recommendations',
+      'data', 'identity', 'network', 'security', 'monitoring', 'governance',
+      'availability', 'backup', 'patch', 'operational', 'provider'
+    ];
+  }
+}

package/src/types.ts CHANGED Viewed

@@ -1,13 +1,29 @@
+export enum BulletMode {
+  ARRAY = 'array',
+  SECTIONS = 'sections',
+  AUTO = 'auto'
+}
 export interface MarkdownSection {
   heading: string;
-  content: any; // Can be string, array of objects (for tables), etc.
+  content: any;
   level: number;
-  format: 'heading' | 'list' | 'table' | 'text';
+  format?: 'heading' | 'bullet' | 'text';
 }
-export type ParseResult = Record<string, any>;
+export interface ParserOptions {
+  bulletMode?: BulletMode;
+  debug?: boolean;
+}
-export interface OutputFormatSpec {
-  // Define if the user wants to enforce a specific schema later
-  [key: string]: any;
+export interface BulletModeResult {
+  mode: 'array' | 'sections';
+  confidence: number;
+  reasons: string[];
 }
+// ADD THIS:
+export interface ParseResult {
+  sections?: MarkdownSection[];
+  [key: string]: any;  // Allow any string keys with any values
+}

package/test/bullet-mode.test.ts ADDED Viewed

@@ -0,0 +1,99 @@
+import { markdownToJson } from '../src/index.js';
+import { RemarkParser } from '../src/parser.js';
+import { BulletMode } from '../src/types.js';
+describe('Bullet Mode Detection', () => {
+    it('should auto-detect simple arrays', () => {
+        const md = `
+- Item 1
+- Item 2
+- Item 3
+    `;
+        const result = markdownToJson(md);
+        // Should be an array, or if it has no heading, maybe it's just the root content?
+        // In current implementation: "No sections found, put everything in root".
+        // Root content for a list in ARRAY mode is string[].
+        // However, JSONTransformer converts sections to an object.
+        // If there is only 'Root' section, markdownToJson returns { root: [...] } ?
+        // Let's check JSONTransformer logic.
+        // keys are camelCase headings. 'Root' -> 'root'.
+        expect(result.root).toEqual(['Item 1', 'Item 2', 'Item 3']);
+    });
+    it('should auto-detect sections', () => {
+        const md = `
+- Short Answer
+  The sky is blue.
+- Evidence
+  1. Look up.
+  2. See blue.
+    `;
+        const result = markdownToJson(md);
+        // Should explode into keys
+        expect(result.shortAnswer).toBe('The sky is blue.');
+        // Evidence has a nested list.
+        // In SECTIONS mode, nested list becomes content.
+        // processBulletAsSection logic: "Check for nested list... ONLY a nested list -> convert to array"
+        // OR "Mixed content -> object { text, items }"
+        // Here we have "1. Look up..." which is an ordered list.
+        // Our logic handles 'list' nodes inside the bullet item.
+        // Wait, "1. Look up" is an ordered list. detect-bullet-mode uses 'list' type check.
+        // processBulletAsSection finds `node.type === 'list'`.
+        expect(result.evidence).toBeDefined();
+        expect(Array.isArray(result.evidence)).toBe(true);
+        expect(result.evidence[0]).toContain('Look up');
+    });
+    it('should respect manual override to ARRAY', () => {
+        // A string that LOOKS like sections but we force ARRAY
+        const md = `
+- Short Answer
+- Evidence
+    `;
+        // If auto, might think it's array (no content).
+        // Let's make it look like sections:
+        const mdSec = `
+- Section A:
+  Content A
+- Section B:
+  Content B
+    `;
+        const parser = new RemarkParser({ bulletMode: BulletMode.ARRAY });
+        // We need to bypass JSONTransformer for a direct parser test, or pass parser to transformer
+        // JSONTransformer can take a parser in options.
+        // But markdownToJson doesn't expose options. We'll use the class directly.
+        const sections = parser.parse(mdSec);
+        // In ARRAY mode, it should be treated as a single list under 'Root' (or wherever it is)
+        // and the content should be a flattened string array of the bullets text.
+        expect(sections.length).toBe(1);
+        expect(sections[0]?.heading).toBe('Root');
+        expect(Array.isArray(sections[0]?.content)).toBe(true);
+        expect(sections[0]?.content[0]).toContain('Section A'); // Just the text
+    });
+    it('should respect manual override to SECTIONS', () => {
+        // A string that LOOKS like array
+        const md = `
+- Item 1
+- Item 2
+    `;
+        const parser = new RemarkParser({ bulletMode: BulletMode.SECTIONS });
+        const sections = parser.parse(md);
+        // Should try to explode them.
+        // "Item 1" becomes a heading, content empty.
+        expect(sections.length).toBe(2);
+        expect(sections[0]?.heading).toBe('Item 1');
+        expect(sections[1]?.heading).toBe('Item 2');
+    });
+});