nx-json-parser 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parser.ts CHANGED
@@ -2,18 +2,42 @@ import { unified } from 'unified';
2
2
  import remarkParse from 'remark-parse';
3
3
  import remarkGfm from 'remark-gfm';
4
4
  import { toString } from 'mdast-util-to-string';
5
- import { MarkdownSection } from './types.js';
6
- import { remarkBulletSections } from './plugins/bullet-sections.js';
5
+ import { MarkdownSection, BulletMode, ParserOptions } from './types.js';
6
+ import { detectBulletMode } from './plugins/detect-bullet-mode.js';
7
7
  import { toCamelCase } from 'nx-helpers';
8
8
 
9
9
  export class RemarkParser {
10
10
  private processor = unified()
11
11
  .use(remarkParse)
12
- .use(remarkGfm)
13
- .use(remarkBulletSections);
12
+ .use(remarkGfm);
13
+
14
+ private options: Required<ParserOptions>;
15
+
16
+ constructor(options: ParserOptions = {}) {
17
+ this.options = {
18
+ bulletMode: options.bulletMode ?? BulletMode.AUTO,
19
+ debug: options.debug ?? false
20
+ };
21
+ }
14
22
 
15
23
  parse(markdown: string): MarkdownSection[] {
16
24
  const tree = this.processor.runSync(this.processor.parse(markdown));
25
+
26
+ // Detect bullet mode if AUTO
27
+ let bulletMode = this.options.bulletMode;
28
+ if (bulletMode === BulletMode.AUTO) {
29
+ const detection = detectBulletMode(tree);
30
+ bulletMode = detection.mode === 'sections' ? BulletMode.SECTIONS : BulletMode.ARRAY;
31
+
32
+ if (this.options.debug) {
33
+ console.log('🔍 Bullet Mode Detection:');
34
+ console.log(` Mode: ${detection.mode}`);
35
+ console.log(` Confidence: ${(detection.confidence * 100).toFixed(0)}%`);
36
+ console.log(` Reasons:`);
37
+ detection.reasons.forEach(r => console.log(` - ${r}`));
38
+ }
39
+ }
40
+
17
41
  const sections: MarkdownSection[] = [];
18
42
  let currentSection: MarkdownSection | null = null;
19
43
  let currentNodes: any[] = [];
@@ -22,9 +46,9 @@ export class RemarkParser {
22
46
 
23
47
  for (const node of rootChildren) {
24
48
  if (node.type === 'heading') {
49
+ // Standard heading section (### Section)
25
50
  if (currentSection) {
26
- currentSection.content = this.processContent(currentNodes);
27
- sections.push(currentSection);
51
+ this.mergeAndPushSection(sections, currentSection, currentNodes, bulletMode);
28
52
  }
29
53
 
30
54
  currentSection = {
@@ -34,18 +58,43 @@ export class RemarkParser {
34
58
  format: 'heading'
35
59
  };
36
60
  currentNodes = [];
61
+
62
+ } else if (node.type === 'list' && bulletMode === BulletMode.SECTIONS) {
63
+ // Bullet-style sections mode: each root-level bullet is a section
64
+ if (currentSection) {
65
+ this.mergeAndPushSection(sections, currentSection, currentNodes, bulletMode);
66
+ currentSection = null;
67
+ currentNodes = [];
68
+ }
69
+
70
+ // Process each list item as a section
71
+ const newSections: MarkdownSection[] = [];
72
+ for (const listItem of node.children) {
73
+ newSections.push(...this.processBulletAsSection(listItem));
74
+ }
75
+
76
+ if (newSections.length > 0) {
77
+ // Push all except the last one as complete sections
78
+ for (let i = 0; i < newSections.length - 1; i++) {
79
+ sections.push(newSections[i]!);
80
+ }
81
+ // Keep the last one as currentSection to capture subsequent loose content
82
+ currentSection = newSections[newSections.length - 1]!;
83
+ }
84
+
37
85
  } else {
38
86
  currentNodes.push(node);
39
87
  }
40
88
  }
41
89
 
90
+ // Save last section
42
91
  if (currentSection) {
43
- currentSection.content = this.processContent(currentNodes);
44
- sections.push(currentSection);
92
+ this.mergeAndPushSection(sections, currentSection, currentNodes, bulletMode);
45
93
  } else if (currentNodes.length > 0) {
94
+ // No sections found, put everything in root
46
95
  sections.push({
47
96
  heading: 'Root',
48
- content: this.processContent(currentNodes),
97
+ content: this.processContent(currentNodes, bulletMode),
49
98
  level: 0,
50
99
  format: 'text'
51
100
  });
@@ -54,31 +103,139 @@ export class RemarkParser {
54
103
  return sections;
55
104
  }
56
105
 
57
- private processContent(nodes: any[]): any {
106
+ /**
107
+ * Process content nodes into appropriate format
108
+ */
109
+ private processContent(nodes: any[], bulletMode: BulletMode): any {
58
110
  if (nodes.length === 0) return '';
59
111
 
112
+ // Single table - return as array of objects
60
113
  if (nodes.length === 1 && nodes[0].type === 'table') {
61
114
  return this.tableToArray(nodes[0]);
62
115
  }
63
116
 
117
+ // Single list - behavior depends on mode
64
118
  if (nodes.length === 1 && nodes[0].type === 'list') {
65
- return nodes[0].children.map((item: any) => toString(item).trim());
119
+ if (bulletMode === BulletMode.ARRAY) {
120
+ // Mode 1: Return as simple array of strings
121
+ return this.listToArray(nodes[0]);
122
+ } else {
123
+ // Mode 2: In sections mode, nested lists are still arrays
124
+ return this.listToArray(nodes[0]);
125
+ }
66
126
  }
67
127
 
128
+ // Multiple nodes or mixed content
68
129
  return nodes.map(node => {
69
130
  if (node.type === 'table') {
70
- return JSON.stringify(this.tableToArray(node));
131
+ return this.tableToArray(node);
132
+ }
133
+ if (node.type === 'list') {
134
+ return this.listToArray(node);
71
135
  }
72
136
  return toString(node);
73
137
  }).join('\n\n').trim();
74
138
  }
75
139
 
140
+ /**
141
+ * Process a bullet list item as a section (Mode 2: SECTIONS)
142
+ */
143
+ private processBulletAsSection(listItem: any): MarkdownSection[] {
144
+ const sections: MarkdownSection[] = [];
145
+
146
+ // First child is the bullet text (section heading)
147
+ const firstChild = listItem.children[0];
148
+ if (!firstChild) return sections;
149
+
150
+ const rawText = toString(firstChild).trim();
151
+ const lines = rawText.split('\n');
152
+ const heading = lines[0]?.trim() || '';
153
+ const sameNodeContent = lines.slice(1).join('\n').trim();
154
+
155
+ // Clean heading: remove trailing colon if present
156
+ const cleanHeading = heading.replace(/:$/, '');
157
+
158
+ // Rest of the children are the content
159
+ const contentNodes = listItem.children.slice(1);
160
+
161
+ let content: any;
162
+
163
+ if (contentNodes.length === 0) {
164
+ // No extra nodes, but maybe same-node content?
165
+ content = sameNodeContent;
166
+ } else {
167
+ // If we have sameNodeContent, we should prepend it to the text content?
168
+ // This gets complicated if contentNodes are mixed.
169
+ // Simplified: If sameNodeContent exists, assume it's part of the text.
170
+ // Check for nested list
171
+ const nestedList = contentNodes.find((node: any) => node.type === 'list');
172
+
173
+ if (nestedList && contentNodes.length === 1) {
174
+ // ONLY a nested list - convert to array
175
+ content = this.listToArray(nestedList);
176
+ } else if (nestedList) {
177
+ // Mixed content: paragraphs + nested list
178
+ // For now, convert nested list to array and combine
179
+ const paragraphs = contentNodes
180
+ .filter((node: any) => node.type !== 'list')
181
+ .map((node: any) => toString(node))
182
+ .join('\n\n')
183
+ .trim();
184
+
185
+ const fullText = sameNodeContent ? sameNodeContent + '\n\n' + paragraphs : paragraphs;
186
+
187
+ const nestedArray = this.listToArray(nestedList);
188
+
189
+ // Combine: return object with text and items
190
+ content = {
191
+ text: fullText,
192
+ items: nestedArray
193
+ };
194
+ } else {
195
+ // Only paragraphs/text content
196
+ const nodeText = contentNodes
197
+ .map((node: any) => toString(node))
198
+ .join('\n\n')
199
+ .trim();
200
+ content = sameNodeContent ? sameNodeContent + '\n\n' + nodeText : nodeText;
201
+ }
202
+ }
203
+
204
+ sections.push({
205
+ heading: cleanHeading,
206
+ content,
207
+ level: 1,
208
+ format: 'bullet'
209
+ });
210
+
211
+ return sections;
212
+ }
213
+
214
+ /**
215
+ * Convert list to simple array of strings (Mode 1: ARRAY)
216
+ */
217
+ private listToArray(listNode: any): string[] {
218
+ return listNode.children.map((item: any) => {
219
+ // Get just the first paragraph/text, ignore nested content
220
+ const firstChild = item.children[0];
221
+ return toString(firstChild).trim();
222
+ });
223
+ }
224
+
225
+ /**
226
+ * Convert table to array of objects
227
+ */
76
228
  private tableToArray(tableNode: any): any[] {
77
- const headers = tableNode.children[0].children.map((cell: any) =>
229
+ const rows = tableNode.children;
230
+ if (rows.length === 0) return [];
231
+
232
+ // First row = headers
233
+ const headers = rows[0].children.map((cell: any) =>
78
234
  toCamelCase(toString(cell).trim())
79
235
  );
80
236
 
81
- return tableNode.children.slice(1).map((row: any) => {
237
+ // Remaining rows = data
238
+ return rows.slice(1).map((row: any) => {
82
239
  const obj: any = {};
83
240
  row.children.forEach((cell: any, i: number) => {
84
241
  const key = headers[i] || `column${i}`;
@@ -87,4 +244,45 @@ export class RemarkParser {
87
244
  return obj;
88
245
  });
89
246
  }
90
- }
247
+
248
+ /**
249
+ * Convert sections to object (utility method)
250
+ */
251
+ sectionsToObject(sections: MarkdownSection[]): Record<string, any> {
252
+ const result: Record<string, any> = {};
253
+
254
+ for (const section of sections) {
255
+ const key = toCamelCase(section.heading);
256
+ result[key] = section.content;
257
+ }
258
+
259
+ return result;
260
+ }
261
+
262
+ private mergeAndPushSection(sections: MarkdownSection[], section: MarkdownSection, nodes: any[], bulletMode: BulletMode) {
263
+ // If the section is already in the list (because we pushed it in the loop but kept a reference),
264
+ // we shouldn't push it again?
265
+ // Wait, my logic implementation in loop:
266
+ // "newSections.push(...); if (newSections.length > 0) { ... loop push n-1 ... currentSection = nth }"
267
+ // So currentSection is NOT in sections yet.
268
+ // It's safe to push.
269
+
270
+ const newContent = this.processContent(nodes, bulletMode);
271
+
272
+ if (newContent) {
273
+ if (!section.content) {
274
+ section.content = newContent;
275
+ } else if (typeof section.content === 'string' && typeof newContent === 'string') {
276
+ section.content += '\n\n' + newContent;
277
+ } else {
278
+ if (Array.isArray(section.content)) {
279
+ section.content.push(newContent);
280
+ } else {
281
+ section.content = [section.content, newContent];
282
+ }
283
+ }
284
+ }
285
+
286
+ sections.push(section);
287
+ }
288
+ }
@@ -1,86 +1,61 @@
1
- import { toString } from 'mdast-util-to-string';
2
-
3
- export function remarkBulletSections() {
4
- return (tree: any) => {
5
- const children = tree.children;
6
- if (!children) return;
7
-
8
- for (let i = 0; i < children.length; i++) {
9
- const node = children[i];
10
- if (node.type === 'list' && node.ordered === false) {
11
- const items = node.children;
12
- if (items.length === 0) continue;
13
-
14
- const newRootNodes: any[] = [];
15
- let currentListItems: any[] = [];
16
-
17
- items.forEach((item: any, idx: number) => {
18
- const firstChild = item.children[0];
19
- const text = firstChild ? toString(firstChild) : '';
20
- const lines = text.trim().split('\n');
21
- const firstLine = lines[0]?.trim() || '';
22
1
 
23
- const isShort = firstLine.length > 0 && firstLine.length < 150;
24
- const hasMoreContent = lines.length > 1 || item.children.length > 1;
25
-
26
- // Section detection:
27
- // 1. It's short and has more content.
28
- // 2. OR it's short and is followed by a non-short item? (Hard to check here)
29
- // 3. OR it's one of the "known" section keywords.
30
- const keywords = ['answer', 'assumptions', 'unknowns', 'evidence', 'protection', 'control', 'management', 'design', 'logging', 'monitoring', 'backups', 'compliance', 'governance', 'modeling', 'incident', 'vendor', 'changes'];
31
- const isSectionKeyword = keywords.some(k => firstLine.toLowerCase().includes(k));
32
-
33
- if (isShort && (hasMoreContent || isSectionKeyword)) {
34
- // Flush existing list items if any
35
- if (currentListItems.length > 0) {
36
- newRootNodes.push({
37
- type: 'list',
38
- ordered: false,
39
- children: [...currentListItems]
40
- });
41
- currentListItems = [];
42
- }
43
-
44
- // Add as heading
45
- newRootNodes.push({
46
- type: 'heading',
47
- depth: 2,
48
- children: [{ type: 'text', value: firstLine }]
49
- });
50
-
51
- // Add content
52
- if (lines.length > 1) {
53
- newRootNodes.push({
54
- type: 'paragraph',
55
- children: [{ type: 'text', value: lines.slice(1).join('\n').trim() }]
56
- });
57
- }
58
- if (item.children.length > 1) {
59
- newRootNodes.push(...item.children.slice(1));
60
- }
61
- } else {
62
- currentListItems.push(item);
63
- }
64
- });
2
+ /**
3
+ * Remark plugin to handle bullet-style sections
4
+ * Converts bullets that look like sections into heading nodes
5
+ */
6
+ import { visit } from 'unist-util-visit';
7
+ import { toString } from 'mdast-util-to-string';
65
8
 
66
- // Flush remaining
67
- if (currentListItems.length > 0) {
68
- newRootNodes.push({
69
- type: 'list',
70
- ordered: false,
71
- children: [...currentListItems]
72
- });
73
- }
9
+ const SECTION_KEYWORDS = [
10
+ 'answer', 'summary', 'introduction', 'conclusion', 'overview',
11
+ 'assumptions', 'unknowns', 'evidence', 'notes', 'details',
12
+ 'description', 'background', 'analysis', 'findings', 'recommendations',
13
+ 'data', 'identity', 'network', 'security', 'monitoring', 'governance',
14
+ 'availability', 'backup', 'patch', 'operational', 'provider'
15
+ ];
74
16
 
75
- if (newRootNodes.length > 0) {
76
- // If we performed any transformation (i.e., we found at least one heading)
77
- const hasHeadings = newRootNodes.some(n => n.type === 'heading');
78
- if (hasHeadings) {
79
- children.splice(i, 1, ...newRootNodes);
80
- i += newRootNodes.length - 1;
81
- }
82
- }
83
- }
84
- }
85
- };
86
- }
17
+ export function remarkBulletSections() {
18
+ return (tree: any) => {
19
+ const transformations: Array<{
20
+ parent: any;
21
+ index: number;
22
+ listItem: any;
23
+ }> = [];
24
+
25
+ // First pass: identify bullets that should become headings
26
+ visit(tree, 'list', (node: any, index?: number, parent?: any) => {
27
+ if (!parent || node.ordered) return; // Only unordered lists
28
+
29
+ // Don't transform if there are no section bullets
30
+ // This preserves normal list behavior
31
+ if (transformations.length === 0) {
32
+ return tree;
33
+ }
34
+
35
+ // For now, just mark them for detection
36
+ // The actual transformation happens in the parser
37
+ return tree;
38
+ });
39
+ }}
40
+
41
+ function isSectionBullet(listItem: any): boolean {
42
+ if (!listItem.children || listItem.children.length === 0) {
43
+ return false;
44
+ }
45
+
46
+ const firstChild = listItem.children[0];
47
+ const text = toString(firstChild).toLowerCase();
48
+
49
+ // Check for section indicators
50
+ const hasKeyword = SECTION_KEYWORDS.some(kw => text.includes(kw));
51
+ const hasColon = text.includes(':');
52
+ const hasContent = listItem.children.length > 1;
53
+ const isCapitalized = /^[A-Z]/.test(text);
54
+ const isLong = text.length > 30;
55
+
56
+ // It's likely a section if it has multiple indicators
57
+ const indicators = [hasKeyword, hasColon, hasContent, isCapitalized, isLong];
58
+ const score = indicators.filter(Boolean).length;
59
+
60
+ return score >= 2;
61
+ }
@@ -0,0 +1,86 @@
1
+ import { toString } from 'mdast-util-to-string';
2
+
3
+ export function remarkBulletSections() {
4
+ return (tree: any) => {
5
+ const children = tree.children;
6
+ if (!children) return;
7
+
8
+ for (let i = 0; i < children.length; i++) {
9
+ const node = children[i];
10
+ if (node.type === 'list' && node.ordered === false) {
11
+ const items = node.children;
12
+ if (items.length === 0) continue;
13
+
14
+ const newRootNodes: any[] = [];
15
+ let currentListItems: any[] = [];
16
+
17
+ items.forEach((item: any, idx: number) => {
18
+ const firstChild = item.children[0];
19
+ const text = firstChild ? toString(firstChild) : '';
20
+ const lines = text.trim().split('\n');
21
+ const firstLine = lines[0]?.trim() || '';
22
+
23
+ const isShort = firstLine.length > 0 && firstLine.length < 150;
24
+ const hasMoreContent = lines.length > 1 || item.children.length > 1;
25
+
26
+ // Section detection:
27
+ // 1. It's short and has more content.
28
+ // 2. OR it's short and is followed by a non-short item? (Hard to check here)
29
+ // 3. OR it's one of the "known" section keywords.
30
+ const keywords = ['answer', 'assumptions', 'unknowns', 'evidence', 'protection', 'control', 'management', 'design', 'logging', 'monitoring', 'backups', 'compliance', 'governance', 'modeling', 'incident', 'vendor', 'changes'];
31
+ const isSectionKeyword = keywords.some(k => firstLine.toLowerCase().includes(k));
32
+
33
+ if (isShort && (hasMoreContent || isSectionKeyword)) {
34
+ // Flush existing list items if any
35
+ if (currentListItems.length > 0) {
36
+ newRootNodes.push({
37
+ type: 'list',
38
+ ordered: false,
39
+ children: [...currentListItems]
40
+ });
41
+ currentListItems = [];
42
+ }
43
+
44
+ // Add as heading
45
+ newRootNodes.push({
46
+ type: 'heading',
47
+ depth: 2,
48
+ children: [{ type: 'text', value: firstLine }]
49
+ });
50
+
51
+ // Add content
52
+ if (lines.length > 1) {
53
+ newRootNodes.push({
54
+ type: 'paragraph',
55
+ children: [{ type: 'text', value: lines.slice(1).join('\n').trim() }]
56
+ });
57
+ }
58
+ if (item.children.length > 1) {
59
+ newRootNodes.push(...item.children.slice(1));
60
+ }
61
+ } else {
62
+ currentListItems.push(item);
63
+ }
64
+ });
65
+
66
+ // Flush remaining
67
+ if (currentListItems.length > 0) {
68
+ newRootNodes.push({
69
+ type: 'list',
70
+ ordered: false,
71
+ children: [...currentListItems]
72
+ });
73
+ }
74
+
75
+ if (newRootNodes.length > 0) {
76
+ // If we performed any transformation (i.e., we found at least one heading)
77
+ const hasHeadings = newRootNodes.some(n => n.type === 'heading');
78
+ if (hasHeadings) {
79
+ children.splice(i, 1, ...newRootNodes);
80
+ i += newRootNodes.length - 1;
81
+ }
82
+ }
83
+ }
84
+ }
85
+ };
86
+ }
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Detect if bullets should be treated as array items or sections
3
+ */
4
+ import { visit } from 'unist-util-visit';
5
+ import { toString } from 'mdast-util-to-string';
6
+
7
+ export interface BulletModeResult {
8
+ mode: 'array' | 'sections';
9
+ confidence: number;
10
+ reasons: string[];
11
+ }
12
+
13
+ const SECTION_KEYWORDS = [
14
+ 'answer', 'summary', 'introduction', 'conclusion', 'overview',
15
+ 'assumptions', 'unknowns', 'evidence', 'notes', 'details',
16
+ 'description', 'background', 'analysis', 'findings', 'recommendations',
17
+ 'data', 'identity', 'network', 'security', 'monitoring', 'governance',
18
+ 'availability', 'backup', 'patch', 'operational', 'provider'
19
+ ];
20
+
21
+ export function detectBulletMode(tree: any): BulletModeResult {
22
+ const reasons: string[] = [];
23
+ let sectionScore = 0;
24
+ let arrayScore = 0;
25
+
26
+ let bulletCount = 0;
27
+ let bulletsWithContent = 0;
28
+ let bulletsWithColons = 0;
29
+ let bulletsWithNestedLists = 0;
30
+ let bulletsWithKeywords = 0;
31
+ let totalBulletLength = 0;
32
+ let rootLevelBullets = 0;
33
+
34
+ // Analyze the tree structure
35
+ visit(tree, 'list', (listNode: any, index?: number, parent?: any) => {
36
+ // Only analyze root-level lists or lists directly under root
37
+ const isRootLevel = parent?.type === 'root';
38
+
39
+ if (!isRootLevel) return;
40
+
41
+ rootLevelBullets += listNode.children.length;
42
+
43
+ for (const listItem of listNode.children) {
44
+ bulletCount++;
45
+
46
+ // Get the text of this list item (first paragraph/text only)
47
+ const firstChild = listItem.children[0];
48
+ const itemText = firstChild ? toString(firstChild) : '';
49
+ totalBulletLength += itemText.length;
50
+
51
+ // 1. Check for colons (title pattern: "Short answer:" or "Data protection:")
52
+ if (itemText.includes(':')) {
53
+ bulletsWithColons++;
54
+ }
55
+
56
+ // 2. Check for content after the bullet (paragraphs, nested items)
57
+ if (listItem.children.length > 1) {
58
+ bulletsWithContent++;
59
+ }
60
+
61
+ // 3. Check for nested lists
62
+ const hasNestedList = listItem.children.some((child: any) => child.type === 'list');
63
+ if (hasNestedList) {
64
+ bulletsWithNestedLists++;
65
+ }
66
+
67
+ // 4. Check for section keywords
68
+ const lowerText = itemText.toLowerCase();
69
+ if (SECTION_KEYWORDS.some(kw => lowerText.includes(kw))) {
70
+ bulletsWithKeywords++;
71
+ }
72
+ }
73
+ });
74
+
75
+ if (bulletCount === 0) {
76
+ return {
77
+ mode: 'array',
78
+ confidence: 0,
79
+ reasons: ['No bullets found']
80
+ };
81
+ }
82
+
83
+ const avgLength = totalBulletLength / bulletCount;
84
+
85
+ // === SCORING FOR SECTIONS MODE ===
86
+
87
+ if (bulletsWithColons > 0) {
88
+ const percent = (bulletsWithColons / bulletCount * 100).toFixed(0);
89
+ sectionScore += 3;
90
+ reasons.push(`${bulletsWithColons}/${bulletCount} bullets have colons (${percent}%) - strong section indicator`);
91
+ }
92
+
93
+ if (bulletsWithContent > 0) {
94
+ const percent = (bulletsWithContent / bulletCount * 100).toFixed(0);
95
+ sectionScore += 3;
96
+ reasons.push(`${bulletsWithContent}/${bulletCount} bullets have content below (${percent}%) - strong section indicator`);
97
+ }
98
+
99
+ if (bulletsWithNestedLists > 0) {
100
+ sectionScore += 2;
101
+ reasons.push(`${bulletsWithNestedLists} bullets have nested lists - section indicator`);
102
+ }
103
+
104
+ if (bulletsWithKeywords > 0) {
105
+ sectionScore += 2;
106
+ reasons.push(`${bulletsWithKeywords} bullets contain section keywords`);
107
+ }
108
+
109
+ if (avgLength > 30) {
110
+ sectionScore += 1;
111
+ reasons.push(`Long bullet text (avg ${avgLength.toFixed(0)} chars) - suggests titles`);
112
+ }
113
+
114
+ if (bulletCount <= 5 && (bulletsWithContent > 0 || bulletsWithColons > 0)) {
115
+ sectionScore += 1;
116
+ reasons.push('Few bullets with rich content suggests sections');
117
+ }
118
+
119
+ // === SCORING FOR ARRAY MODE ===
120
+
121
+ if (bulletsWithColons === 0 && bulletsWithContent === 0 && bulletsWithNestedLists === 0) {
122
+ arrayScore += 4;
123
+ reasons.push('No bullets have content, colons, or nesting - pure list indicator');
124
+ }
125
+
126
+ if (avgLength < 30) {
127
+ arrayScore += 2;
128
+ reasons.push(`Short bullet text (avg ${avgLength.toFixed(0)} chars) - suggests list items`);
129
+ }
130
+
131
+ if (bulletCount >= 3 && bulletsWithContent === 0) {
132
+ arrayScore += 2;
133
+ reasons.push(`${bulletCount} simple bullets without content - array pattern`);
134
+ }
135
+
136
+ if (bulletsWithKeywords === 0) {
137
+ arrayScore += 1;
138
+ reasons.push('No section keywords found');
139
+ }
140
+
141
+ // Edge case: all bullets are very short and no special features
142
+ if (avgLength < 20 && bulletsWithContent === 0 && bulletsWithColons === 0) {
143
+ arrayScore += 2;
144
+ reasons.push('Very short bullets with no features - definitely array');
145
+ }
146
+
147
+ // === DECISION ===
148
+
149
+ const totalScore = sectionScore + arrayScore;
150
+ const mode = sectionScore > arrayScore ? 'sections' : 'array';
151
+ const confidence = totalScore > 0 ? Math.abs(sectionScore - arrayScore) / totalScore : 0;
152
+
153
+ // Format reasons (top 5)
154
+ const topReasons = reasons.slice(0, 5);
155
+
156
+ return {
157
+ mode,
158
+ confidence,
159
+ reasons: topReasons
160
+ };
161
+ }