specmem-hardwicksoftware 3.7.19 → 3.7.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -88,6 +88,18 @@ export class CodeAnalyzer {
88
88
  case 'rust':
89
89
  this.extractRustDefinitions(fileId, filePath, lines, language, definitions);
90
90
  break;
91
+ case 'html':
92
+ this.extractHTMLDefinitions(fileId, filePath, lines, language, definitions);
93
+ break;
94
+ case 'java':
95
+ case 'kotlin':
96
+ case 'scala':
97
+ this.extractJavaDefinitions(fileId, filePath, lines, language, definitions);
98
+ break;
99
+ case 'cpp':
100
+ case 'c':
101
+ this.extractCppDefinitions(fileId, filePath, lines, language, definitions);
102
+ break;
91
103
  default:
92
104
  // Generic extraction for unknown languages
93
105
  this.extractGenericDefinitions(fileId, filePath, lines, language, definitions);
@@ -657,6 +669,18 @@ export class CodeAnalyzer {
657
669
  case 'rust':
658
670
  this.extractRustDependencies(fileId, filePath, lines, language, dependencies);
659
671
  break;
672
+ case 'html':
673
+ this.extractHTMLDependencies(fileId, filePath, lines, language, dependencies);
674
+ break;
675
+ case 'java':
676
+ case 'kotlin':
677
+ case 'scala':
678
+ this.extractJavaDependencies(fileId, filePath, lines, language, dependencies);
679
+ break;
680
+ case 'cpp':
681
+ case 'c':
682
+ this.extractCppDependencies(fileId, filePath, lines, language, dependencies);
683
+ break;
660
684
  default:
661
685
  // Try generic patterns
662
686
  this.extractGenericDependencies(fileId, filePath, lines, language, dependencies);
@@ -980,6 +1004,9 @@ export class CodeAnalyzer {
980
1004
  * createChunks - splits code into chunks for semantic search
981
1005
  */
982
1006
  createChunks(fileId, filePath, content, language, lines) {
1007
+ if (language === 'html') {
1008
+ return this.createHTMLChunks(fileId, filePath, content, language, lines);
1009
+ }
983
1010
  const chunks = [];
984
1011
  // Don't chunk small files
985
1012
  if (lines.length <= this.chunkSize) {
@@ -1155,6 +1182,1134 @@ export class CodeAnalyzer {
1155
1182
  // ========================================
1156
1183
  // HELPER METHODS
1157
1184
  // ========================================
1185
+ // ========================================
1186
+ // JAVA/KOTLIN/SCALA ANALYSIS
1187
+ // ========================================
1188
+ extractJavaDefinitions(fileId, filePath, lines, language, definitions) {
1189
+ const patterns = {
1190
+ // Classes, abstract classes
1191
+ class: /^(?:\s*)(?:(?:public|private|protected|static|final|abstract|sealed|non-sealed)\s+)*class\s+(\w+)(?:<[^>]*>)?(?:\s+extends\s+([\w.<>]+))?(?:\s+implements\s+([^{]+))?/,
1192
+ // Interfaces
1193
+ interface: /^(?:\s*)(?:(?:public|private|protected|static)\s+)*interface\s+(\w+)(?:<[^>]*>)?(?:\s+extends\s+([^{]+))?/,
1194
+ // Enums
1195
+ enum: /^(?:\s*)(?:(?:public|private|protected|static)\s+)*enum\s+(\w+)(?:\s+implements\s+([^{]+))?/,
1196
+ // Annotations
1197
+ annotationDef: /^(?:\s*)(?:(?:public|private|protected)\s+)*@interface\s+(\w+)/,
1198
+ // Records (Java 14+)
1199
+ record: /^(?:\s*)(?:(?:public|private|protected|static|final)\s+)*record\s+(\w+)(?:<[^>]*>)?\s*\(([^)]*)\)/,
1200
+ // Methods and constructors
1201
+ method: /^(?:\s*)(?:(?:public|private|protected|static|final|abstract|synchronized|native|default|strictfp|override)\s+)*(?:(?:<[^>]*>\s+)?)([\w.<>\[\]?]+)\s+(\w+)\s*\(([^)]*)\)/,
1202
+ constructor: /^(?:\s*)(?:(?:public|private|protected)\s+)*(\w+)\s*\(([^)]*)\)\s*(?:throws\s+[\w.,\s]+)?\s*\{/,
1203
+ // Fields, constants, variables
1204
+ field: /^(?:\s*)(?:(?:public|private|protected|static|final|volatile|transient)\s+)+([\w.<>\[\]?]+)\s+(\w+)\s*(?:=\s*(.+?))?;/,
1205
+ // Static/instance initializer blocks
1206
+ staticInit: /^(?:\s*)static\s*\{/,
1207
+ // Annotations on next line (track for context)
1208
+ annotation: /^(?:\s*)@(\w+)(?:\(([^)]*)\))?/,
1209
+ // Kotlin-specific
1210
+ kotlinFun: /^(?:\s*)(?:(?:public|private|protected|internal|open|override|suspend|inline|infix|operator|tailrec)\s+)*fun\s+(?:<[^>]*>\s+)?(\w+)\s*\(([^)]*)\)(?:\s*:\s*([\w.<>?]+))?/,
1211
+ kotlinVal: /^(?:\s*)(?:(?:public|private|protected|internal|open|override|const|lateinit)\s+)*(?:val|var)\s+(\w+)\s*(?::\s*([\w.<>?]+))?\s*(?:=|$)/,
1212
+ kotlinObject: /^(?:\s*)(?:(?:public|private|protected|internal)\s+)*(?:companion\s+)?object\s+(\w+)?/,
1213
+ kotlinDataClass: /^(?:\s*)(?:(?:public|private|protected|internal|open|sealed)\s+)*data\s+class\s+(\w+)/,
1214
+ };
1215
+ const definitionStack = [];
1216
+ let braceDepth = 0;
1217
+ let lastAnnotations = [];
1218
+ for (let i = 0; i < lines.length; i++) {
1219
+ const line = lines[i];
1220
+ const trimmed = line.trim();
1221
+ const openBraces = (line.match(/\{/g) || []).length;
1222
+ const closeBraces = (line.match(/\}/g) || []).length;
1223
+ // Pop definitions from stack when scope closes
1224
+ for (let b = 0; b < closeBraces; b++) {
1225
+ const newDepth = braceDepth - (b + 1);
1226
+ while (definitionStack.length > 0 && definitionStack[definitionStack.length - 1].braceDepthAtStart >= newDepth) {
1227
+ definitionStack.pop();
1228
+ }
1229
+ }
1230
+ braceDepth += openBraces - closeBraces;
1231
+ if (trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*') || trimmed === '') {
1232
+ continue;
1233
+ }
1234
+ const getCurrentParent = () => definitionStack.length > 0 ? definitionStack[definitionStack.length - 1].def : null;
1235
+ const getQualifiedName = (name) => {
1236
+ const parent = getCurrentParent();
1237
+ if (!parent) return undefined;
1238
+ return parent.qualifiedName ? `${parent.qualifiedName}.${name}` : `${parent.name}.${name}`;
1239
+ };
1240
+ const getVisibility = (line) => {
1241
+ if (line.includes('private')) return 'private';
1242
+ if (line.includes('protected')) return 'protected';
1243
+ if (line.includes('internal')) return 'internal';
1244
+ return 'public';
1245
+ };
1246
+ // Track annotations
1247
+ let match = line.match(patterns.annotation);
1248
+ if (match && !line.match(patterns.annotationDef)) {
1249
+ lastAnnotations.push(match[1]);
1250
+ continue;
1251
+ }
1252
+ const decorators = [...lastAnnotations];
1253
+ lastAnnotations = [];
1254
+ // Annotation definition (@interface)
1255
+ match = line.match(patterns.annotationDef);
1256
+ if (match) {
1257
+ const def = this.createDefinition(fileId, filePath, {
1258
+ name: match[1],
1259
+ qualifiedName: getQualifiedName(match[1]),
1260
+ definitionType: 'annotation',
1261
+ startLine: i + 1,
1262
+ language,
1263
+ visibility: getVisibility(line),
1264
+ isExported: line.includes('public'),
1265
+ parentDefinitionId: getCurrentParent()?.id,
1266
+ signature: trimmed,
1267
+ decorators
1268
+ });
1269
+ definitions.push(def);
1270
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1271
+ continue;
1272
+ }
1273
+ // Enum
1274
+ match = line.match(patterns.enum);
1275
+ if (match) {
1276
+ const parent = getCurrentParent();
1277
+ const def = this.createDefinition(fileId, filePath, {
1278
+ name: match[1],
1279
+ qualifiedName: getQualifiedName(match[1]),
1280
+ definitionType: 'enum',
1281
+ startLine: i + 1,
1282
+ language,
1283
+ visibility: getVisibility(line),
1284
+ isExported: line.includes('public'),
1285
+ parentDefinitionId: parent?.id,
1286
+ signature: trimmed,
1287
+ decorators
1288
+ });
1289
+ definitions.push(def);
1290
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1291
+ continue;
1292
+ }
1293
+ // Record (Java 14+)
1294
+ match = line.match(patterns.record);
1295
+ if (match) {
1296
+ const parent = getCurrentParent();
1297
+ const def = this.createDefinition(fileId, filePath, {
1298
+ name: match[1],
1299
+ qualifiedName: getQualifiedName(match[1]),
1300
+ definitionType: 'record',
1301
+ startLine: i + 1,
1302
+ language,
1303
+ visibility: getVisibility(line),
1304
+ isExported: line.includes('public'),
1305
+ parentDefinitionId: parent?.id,
1306
+ signature: trimmed,
1307
+ decorators
1308
+ });
1309
+ definitions.push(def);
1310
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1311
+ continue;
1312
+ }
1313
+ // Interface
1314
+ match = line.match(patterns.interface);
1315
+ if (match) {
1316
+ const parent = getCurrentParent();
1317
+ const def = this.createDefinition(fileId, filePath, {
1318
+ name: match[1],
1319
+ qualifiedName: getQualifiedName(match[1]),
1320
+ definitionType: 'interface',
1321
+ startLine: i + 1,
1322
+ language,
1323
+ visibility: getVisibility(line),
1324
+ isExported: line.includes('public'),
1325
+ parentDefinitionId: parent?.id,
1326
+ signature: trimmed,
1327
+ decorators
1328
+ });
1329
+ definitions.push(def);
1330
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1331
+ continue;
1332
+ }
1333
+ // Kotlin data class
1334
+ match = line.match(patterns.kotlinDataClass);
1335
+ if (match) {
1336
+ const parent = getCurrentParent();
1337
+ const def = this.createDefinition(fileId, filePath, {
1338
+ name: match[1],
1339
+ qualifiedName: getQualifiedName(match[1]),
1340
+ definitionType: 'class',
1341
+ startLine: i + 1,
1342
+ language,
1343
+ visibility: getVisibility(line),
1344
+ parentDefinitionId: parent?.id,
1345
+ signature: trimmed,
1346
+ decorators,
1347
+ metadata: { isDataClass: true }
1348
+ });
1349
+ definitions.push(def);
1350
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1351
+ continue;
1352
+ }
1353
+ // Class
1354
+ match = line.match(patterns.class);
1355
+ if (match) {
1356
+ const parent = getCurrentParent();
1357
+ const def = this.createDefinition(fileId, filePath, {
1358
+ name: match[1],
1359
+ qualifiedName: getQualifiedName(match[1]),
1360
+ definitionType: 'class',
1361
+ startLine: i + 1,
1362
+ language,
1363
+ visibility: getVisibility(line),
1364
+ isExported: line.includes('public'),
1365
+ isAbstract: line.includes('abstract'),
1366
+ isStatic: line.includes('static'),
1367
+ parentDefinitionId: parent?.id,
1368
+ signature: trimmed,
1369
+ decorators
1370
+ });
1371
+ definitions.push(def);
1372
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1373
+ continue;
1374
+ }
1375
+ // Kotlin object/companion
1376
+ match = line.match(patterns.kotlinObject);
1377
+ if (match) {
1378
+ const name = match[1] || 'Companion';
1379
+ const def = this.createDefinition(fileId, filePath, {
1380
+ name,
1381
+ qualifiedName: getQualifiedName(name),
1382
+ definitionType: 'class',
1383
+ startLine: i + 1,
1384
+ language,
1385
+ visibility: getVisibility(line),
1386
+ isStatic: true,
1387
+ parentDefinitionId: getCurrentParent()?.id,
1388
+ signature: trimmed,
1389
+ metadata: { isObject: true, isCompanion: line.includes('companion') }
1390
+ });
1391
+ definitions.push(def);
1392
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1393
+ continue;
1394
+ }
1395
+ // Kotlin fun
1396
+ match = line.match(patterns.kotlinFun);
1397
+ if (match) {
1398
+ const parent = getCurrentParent();
1399
+ definitions.push(this.createDefinition(fileId, filePath, {
1400
+ name: match[1],
1401
+ qualifiedName: getQualifiedName(match[1]),
1402
+ definitionType: parent?.definitionType === 'class' || parent?.definitionType === 'interface' ? 'method' : 'function',
1403
+ startLine: i + 1,
1404
+ language,
1405
+ visibility: getVisibility(line),
1406
+ isAsync: line.includes('suspend'),
1407
+ returnType: match[3],
1408
+ parentDefinitionId: parent?.id,
1409
+ signature: trimmed,
1410
+ decorators
1411
+ }));
1412
+ continue;
1413
+ }
1414
+ // Kotlin val/var
1415
+ match = line.match(patterns.kotlinVal);
1416
+ if (match) {
1417
+ const parent = getCurrentParent();
1418
+ const isConst = line.includes('const') || (line.includes('val') && braceDepth <= 1);
1419
+ definitions.push(this.createDefinition(fileId, filePath, {
1420
+ name: match[1],
1421
+ qualifiedName: getQualifiedName(match[1]),
1422
+ definitionType: isConst ? 'constant' : 'variable',
1423
+ startLine: i + 1,
1424
+ language,
1425
+ visibility: getVisibility(line),
1426
+ parentDefinitionId: parent?.id,
1427
+ signature: trimmed,
1428
+ decorators
1429
+ }));
1430
+ continue;
1431
+ }
1432
+ // Static initializer
1433
+ if (patterns.staticInit.test(line)) {
1434
+ definitions.push(this.createDefinition(fileId, filePath, {
1435
+ name: 'static_init',
1436
+ qualifiedName: getQualifiedName('static_init'),
1437
+ definitionType: 'function',
1438
+ startLine: i + 1,
1439
+ language,
1440
+ isStatic: true,
1441
+ parentDefinitionId: getCurrentParent()?.id,
1442
+ signature: trimmed
1443
+ }));
1444
+ continue;
1445
+ }
1446
+ // Constructor — must match BEFORE method since constructors look like methods
1447
+ const parent = getCurrentParent();
1448
+ if (parent && parent.definitionType === 'class') {
1449
+ match = line.match(patterns.constructor);
1450
+ if (match && match[1] === parent.name) {
1451
+ definitions.push(this.createDefinition(fileId, filePath, {
1452
+ name: match[1],
1453
+ qualifiedName: getQualifiedName(match[1]),
1454
+ definitionType: 'constructor',
1455
+ startLine: i + 1,
1456
+ language,
1457
+ visibility: getVisibility(line),
1458
+ parentDefinitionId: parent.id,
1459
+ signature: trimmed,
1460
+ decorators
1461
+ }));
1462
+ continue;
1463
+ }
1464
+ }
1465
+ // Method
1466
+ match = line.match(patterns.method);
1467
+ if (match) {
1468
+ const returnType = match[1];
1469
+ const methodName = match[2];
1470
+ // Skip control flow keywords that look like methods
1471
+ if (['if', 'for', 'while', 'switch', 'catch', 'return', 'new', 'throw'].includes(methodName)) continue;
1472
+ const parentDef = getCurrentParent();
1473
+ definitions.push(this.createDefinition(fileId, filePath, {
1474
+ name: methodName,
1475
+ qualifiedName: getQualifiedName(methodName),
1476
+ definitionType: parentDef?.definitionType === 'class' || parentDef?.definitionType === 'interface' || parentDef?.definitionType === 'enum' ? 'method' : 'function',
1477
+ startLine: i + 1,
1478
+ language,
1479
+ visibility: getVisibility(line),
1480
+ isStatic: line.includes('static'),
1481
+ isAbstract: line.includes('abstract'),
1482
+ isAsync: line.includes('synchronized'),
1483
+ returnType,
1484
+ parentDefinitionId: parentDef?.id,
1485
+ signature: trimmed,
1486
+ decorators
1487
+ }));
1488
+ continue;
1489
+ }
1490
+ // Field / constant
1491
+ match = line.match(patterns.field);
1492
+ if (match) {
1493
+ const fieldType = match[1];
1494
+ const fieldName = match[2];
1495
+ // Skip if it's a keyword
1496
+ if (['return', 'throw', 'new', 'if', 'for', 'while'].includes(fieldName)) continue;
1497
+ const isFinal = line.includes('final');
1498
+ const isStatic = line.includes('static');
1499
+ const parentDef = getCurrentParent();
1500
+ definitions.push(this.createDefinition(fileId, filePath, {
1501
+ name: fieldName,
1502
+ qualifiedName: getQualifiedName(fieldName),
1503
+ definitionType: (isFinal && isStatic) ? 'constant' : 'variable',
1504
+ startLine: i + 1,
1505
+ language,
1506
+ visibility: getVisibility(line),
1507
+ isStatic,
1508
+ parentDefinitionId: parentDef?.id,
1509
+ signature: trimmed,
1510
+ decorators,
1511
+ metadata: { type: fieldType, isFinal }
1512
+ }));
1513
+ }
1514
+ }
1515
+ }
1516
+ extractJavaDependencies(fileId, filePath, lines, language, dependencies) {
1517
+ const patterns = {
1518
+ // import com.foo.Bar;
1519
+ import: /^import\s+(?:static\s+)?([\w.]+(?:\.\*)?);/,
1520
+ // package com.foo.bar;
1521
+ package: /^package\s+([\w.]+);/
1522
+ };
1523
+ for (let i = 0; i < lines.length; i++) {
1524
+ const line = lines[i];
1525
+ const trimmed = line.trim();
1526
+ // Package declaration
1527
+ let match = trimmed.match(patterns.package);
1528
+ if (match) {
1529
+ dependencies.push(this.createDependency(fileId, filePath, {
1530
+ targetPath: match[1],
1531
+ importType: 'package',
1532
+ importStatement: trimmed,
1533
+ lineNumber: i + 1,
1534
+ language,
1535
+ isExternal: false
1536
+ }));
1537
+ continue;
1538
+ }
1539
+ // Import
1540
+ match = trimmed.match(patterns.import);
1541
+ if (match) {
1542
+ const target = match[1];
1543
+ const isStatic = trimmed.includes('static');
1544
+ const isWildcard = target.endsWith('.*');
1545
+ const parts = target.split('.');
1546
+ const importedName = isWildcard ? '*' : parts[parts.length - 1];
1547
+ const packagePath = isWildcard ? target.slice(0, -2) : parts.slice(0, -1).join('.');
1548
+ dependencies.push(this.createDependency(fileId, filePath, {
1549
+ targetPath: target,
1550
+ importType: isStatic ? 'import_static' : 'import',
1551
+ importStatement: trimmed,
1552
+ importedNames: [importedName],
1553
+ isNamespaceImport: isWildcard,
1554
+ lineNumber: i + 1,
1555
+ language,
1556
+ isExternal: !target.startsWith('java.') && !target.startsWith('javax.'),
1557
+ isBuiltin: target.startsWith('java.') || target.startsWith('javax.'),
1558
+ packageName: packagePath
1559
+ }));
1560
+ }
1561
+ }
1562
+ }
1563
+ // ========================================
1564
+ // C/C++ ANALYSIS
1565
+ // ========================================
1566
+ extractCppDefinitions(fileId, filePath, lines, language, definitions) {
1567
+ const patterns = {
1568
+ // Functions: return_type name(params)
1569
+ function: /^(?:(?:static|inline|virtual|explicit|constexpr|consteval|extern|friend|template\s*<[^>]*>\s*)\s+)*(?:const\s+)?(?:unsigned\s+|signed\s+|long\s+|short\s+)*([\w:*&<>[\]]+(?:\s*[*&]+)?)\s+(\w+)\s*\(([^)]*)\)\s*(?:const|override|noexcept|final|=\s*0|=\s*default|=\s*delete)?\s*[{;]/,
1570
+ // Methods with class:: prefix
1571
+ methodImpl: /^(?:(?:const\s+)?(?:unsigned\s+|signed\s+)*)?([\w:*&<>[\]]+(?:\s*[*&]+)?)\s+(\w+)::(\w+)\s*\(([^)]*)\)/,
1572
+ // Class/struct
1573
+ class: /^(?:(?:template\s*<[^>]*>\s*)?)?(?:class|struct)\s+(?:__attribute__\s*\([^)]*\)\s+)?(\w+)(?:\s*:\s*(?:public|private|protected)\s+([\w:<>]+(?:\s*,\s*(?:public|private|protected)\s+[\w:<>]+)*))?/,
1574
+ // Enum
1575
+ enum: /^(?:typedef\s+)?enum\s+(?:class\s+)?(\w+)?(?:\s*:\s*\w+)?\s*\{/,
1576
+ // Typedef
1577
+ typedef: /^typedef\s+(?:(?:struct|union|enum)\s+(?:\w+\s+)?)?([\w\s*&<>]+)\s+(\w+)\s*;/,
1578
+ // Using (C++ type alias)
1579
+ using: /^using\s+(\w+)\s*=\s*(.+);/,
1580
+ // Namespace
1581
+ namespace: /^namespace\s+(\w+)/,
1582
+ // Union
1583
+ union: /^(?:typedef\s+)?union\s+(\w+)/,
1584
+ // Macro definitions
1585
+ define: /^#define\s+(\w+)(?:\(([^)]*)\))?\s*(.*)/,
1586
+ // Global/static variables and constants
1587
+ globalVar: /^(?:(?:static|extern|const|constexpr|volatile|thread_local|inline)\s+)+(?:const\s+)?(?:unsigned\s+|signed\s+|long\s+|short\s+)*([\w:*&<>[\]]+(?:\s*[*&]+)?)\s+(\w+)\s*(?:=\s*(.+?))?;/,
1588
+ // Constructor/destructor in class body
1589
+ ctorDtor: /^\s+(?:(?:explicit|virtual|inline)\s+)*~?(\w+)\s*\(([^)]*)\)\s*(?::\s*[^{]*)?[{;]/,
1590
+ // Operator overload
1591
+ operator: /(?:[\w*&<>]+)\s+operator\s*([^\s(]+)\s*\(([^)]*)\)/,
1592
+ };
1593
+ const definitionStack = [];
1594
+ let braceDepth = 0;
1595
+ let inMultiLineComment = false;
1596
+ for (let i = 0; i < lines.length; i++) {
1597
+ const line = lines[i];
1598
+ const trimmed = line.trim();
1599
+ // Handle multi-line comments
1600
+ if (inMultiLineComment) {
1601
+ if (trimmed.includes('*/')) inMultiLineComment = false;
1602
+ continue;
1603
+ }
1604
+ if (trimmed.startsWith('/*')) {
1605
+ if (!trimmed.includes('*/')) inMultiLineComment = true;
1606
+ continue;
1607
+ }
1608
+ if (trimmed.startsWith('//') || trimmed === '') continue;
1609
+ const openBraces = (line.match(/\{/g) || []).length;
1610
+ const closeBraces = (line.match(/\}/g) || []).length;
1611
+ for (let b = 0; b < closeBraces; b++) {
1612
+ const newDepth = braceDepth - (b + 1);
1613
+ while (definitionStack.length > 0 && definitionStack[definitionStack.length - 1].braceDepthAtStart >= newDepth) {
1614
+ definitionStack.pop();
1615
+ }
1616
+ }
1617
+ braceDepth += openBraces - closeBraces;
1618
+ const getCurrentParent = () => definitionStack.length > 0 ? definitionStack[definitionStack.length - 1].def : null;
1619
+ const getQualifiedName = (name) => {
1620
+ const parent = getCurrentParent();
1621
+ if (!parent) return undefined;
1622
+ return parent.qualifiedName ? `${parent.qualifiedName}::${name}` : `${parent.name}::${name}`;
1623
+ };
1624
+ const getVisibility = (line) => {
1625
+ if (/^\s*private\s*:/.test(line)) return 'private';
1626
+ if (/^\s*protected\s*:/.test(line)) return 'protected';
1627
+ if (/^\s*public\s*:/.test(line)) return 'public';
1628
+ return 'public';
1629
+ };
1630
+ // Macro (#define)
1631
+ let match = line.match(patterns.define);
1632
+ if (match) {
1633
+ definitions.push(this.createDefinition(fileId, filePath, {
1634
+ name: match[1],
1635
+ definitionType: match[2] !== undefined ? 'macro' : 'constant',
1636
+ startLine: i + 1,
1637
+ language,
1638
+ signature: trimmed,
1639
+ metadata: match[2] !== undefined ? { params: match[2] } : {}
1640
+ }));
1641
+ continue;
1642
+ }
1643
+ // Namespace
1644
+ match = line.match(patterns.namespace);
1645
+ if (match) {
1646
+ const def = this.createDefinition(fileId, filePath, {
1647
+ name: match[1],
1648
+ definitionType: 'namespace',
1649
+ startLine: i + 1,
1650
+ language,
1651
+ signature: trimmed
1652
+ });
1653
+ definitions.push(def);
1654
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1655
+ continue;
1656
+ }
1657
+ // Enum
1658
+ match = line.match(patterns.enum);
1659
+ if (match) {
1660
+ const name = match[1] || `anon_enum_L${i + 1}`;
1661
+ const def = this.createDefinition(fileId, filePath, {
1662
+ name,
1663
+ qualifiedName: getQualifiedName(name),
1664
+ definitionType: 'enum',
1665
+ startLine: i + 1,
1666
+ language,
1667
+ parentDefinitionId: getCurrentParent()?.id,
1668
+ signature: trimmed
1669
+ });
1670
+ definitions.push(def);
1671
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1672
+ continue;
1673
+ }
1674
+ // Union
1675
+ match = line.match(patterns.union);
1676
+ if (match) {
1677
+ const def = this.createDefinition(fileId, filePath, {
1678
+ name: match[1],
1679
+ qualifiedName: getQualifiedName(match[1]),
1680
+ definitionType: 'struct',
1681
+ startLine: i + 1,
1682
+ language,
1683
+ parentDefinitionId: getCurrentParent()?.id,
1684
+ signature: trimmed,
1685
+ metadata: { isUnion: true }
1686
+ });
1687
+ definitions.push(def);
1688
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1689
+ continue;
1690
+ }
1691
+ // Class/struct
1692
+ match = line.match(patterns.class);
1693
+ if (match) {
1694
+ const parent = getCurrentParent();
1695
+ const def = this.createDefinition(fileId, filePath, {
1696
+ name: match[1],
1697
+ qualifiedName: getQualifiedName(match[1]),
1698
+ definitionType: trimmed.startsWith('struct') || trimmed.match(/^template.*struct/) ? 'struct' : 'class',
1699
+ startLine: i + 1,
1700
+ language,
1701
+ isAbstract: false,
1702
+ parentDefinitionId: parent?.id,
1703
+ signature: trimmed
1704
+ });
1705
+ definitions.push(def);
1706
+ if (line.includes('{')) definitionStack.push({ def, braceDepthAtStart: braceDepth - openBraces });
1707
+ continue;
1708
+ }
1709
+ // Typedef
1710
+ match = line.match(patterns.typedef);
1711
+ if (match) {
1712
+ definitions.push(this.createDefinition(fileId, filePath, {
1713
+ name: match[2],
1714
+ definitionType: 'type',
1715
+ startLine: i + 1,
1716
+ language,
1717
+ signature: trimmed,
1718
+ metadata: { aliasOf: match[1].trim() }
1719
+ }));
1720
+ continue;
1721
+ }
1722
+ // Using alias
1723
+ match = line.match(patterns.using);
1724
+ if (match) {
1725
+ definitions.push(this.createDefinition(fileId, filePath, {
1726
+ name: match[1],
1727
+ qualifiedName: getQualifiedName(match[1]),
1728
+ definitionType: 'type',
1729
+ startLine: i + 1,
1730
+ language,
1731
+ parentDefinitionId: getCurrentParent()?.id,
1732
+ signature: trimmed,
1733
+ metadata: { aliasOf: match[2].trim() }
1734
+ }));
1735
+ continue;
1736
+ }
1737
+ // Operator overload
1738
+ match = line.match(patterns.operator);
1739
+ if (match) {
1740
+ const parent = getCurrentParent();
1741
+ definitions.push(this.createDefinition(fileId, filePath, {
1742
+ name: `operator${match[1]}`,
1743
+ qualifiedName: getQualifiedName(`operator${match[1]}`),
1744
+ definitionType: 'method',
1745
+ startLine: i + 1,
1746
+ language,
1747
+ parentDefinitionId: parent?.id,
1748
+ signature: trimmed
1749
+ }));
1750
+ continue;
1751
+ }
1752
+ // Method implementation (ClassName::method)
1753
+ match = line.match(patterns.methodImpl);
1754
+ if (match) {
1755
+ definitions.push(this.createDefinition(fileId, filePath, {
1756
+ name: match[3],
1757
+ qualifiedName: `${match[2]}::${match[3]}`,
1758
+ definitionType: match[3] === match[2] ? 'constructor' : (match[3] === `~${match[2]}` ? 'destructor' : 'method'),
1759
+ startLine: i + 1,
1760
+ language,
1761
+ returnType: match[1]?.trim(),
1762
+ signature: trimmed
1763
+ }));
1764
+ continue;
1765
+ }
1766
+ // Constructor/destructor inside class body
1767
+ const parentDef = getCurrentParent();
1768
+ if (parentDef && (parentDef.definitionType === 'class' || parentDef.definitionType === 'struct')) {
1769
+ match = line.match(patterns.ctorDtor);
1770
+ if (match) {
1771
+ const ctorName = match[1];
1772
+ if (ctorName === parentDef.name || ctorName === `~${parentDef.name}` || `~${ctorName}` === trimmed.match(/~(\w+)/)?.[0]) {
1773
+ const isDtor = trimmed.includes('~');
1774
+ definitions.push(this.createDefinition(fileId, filePath, {
1775
+ name: isDtor ? `~${parentDef.name}` : parentDef.name,
1776
+ qualifiedName: getQualifiedName(isDtor ? `~${parentDef.name}` : parentDef.name),
1777
+ definitionType: isDtor ? 'destructor' : 'constructor',
1778
+ startLine: i + 1,
1779
+ language,
1780
+ parentDefinitionId: parentDef.id,
1781
+ signature: trimmed
1782
+ }));
1783
+ continue;
1784
+ }
1785
+ }
1786
+ }
1787
+ // Function (top-level or in namespace) / method (in class)
1788
+ match = line.match(patterns.function);
1789
+ if (match) {
1790
+ const returnType = match[1];
1791
+ const funcName = match[2];
1792
+ if (['if', 'for', 'while', 'switch', 'catch', 'return', 'throw', 'sizeof', 'alignof', 'decltype', 'static_cast', 'dynamic_cast', 'const_cast', 'reinterpret_cast'].includes(funcName)) continue;
1793
+ const parentDef2 = getCurrentParent();
1794
+ const isMethod = parentDef2?.definitionType === 'class' || parentDef2?.definitionType === 'struct';
1795
+ definitions.push(this.createDefinition(fileId, filePath, {
1796
+ name: funcName,
1797
+ qualifiedName: getQualifiedName(funcName),
1798
+ definitionType: isMethod ? 'method' : 'function',
1799
+ startLine: i + 1,
1800
+ language,
1801
+ isStatic: line.includes('static'),
1802
+ isExported: !line.includes('static') && braceDepth === 0,
1803
+ returnType: returnType?.trim(),
1804
+ parentDefinitionId: parentDef2?.id,
1805
+ signature: trimmed,
1806
+ metadata: {
1807
+ isVirtual: line.includes('virtual'),
1808
+ isConstexpr: line.includes('constexpr'),
1809
+ isInline: line.includes('inline'),
1810
+ isConst: /\)\s*const/.test(line)
1811
+ }
1812
+ }));
1813
+ continue;
1814
+ }
1815
+ // Global/static variables and constants
1816
+ if (braceDepth <= 1) {
1817
+ match = line.match(patterns.globalVar);
1818
+ if (match) {
1819
+ const varName = match[2];
1820
+ if (['if', 'for', 'while', 'switch', 'return'].includes(varName)) continue;
1821
+ const isConst = line.includes('const') || line.includes('constexpr');
1822
+ definitions.push(this.createDefinition(fileId, filePath, {
1823
+ name: varName,
1824
+ qualifiedName: getQualifiedName(varName),
1825
+ definitionType: isConst ? 'constant' : 'variable',
1826
+ startLine: i + 1,
1827
+ language,
1828
+ isStatic: line.includes('static'),
1829
+ isExported: line.includes('extern'),
1830
+ parentDefinitionId: getCurrentParent()?.id,
1831
+ signature: trimmed,
1832
+ metadata: { type: match[1].trim() }
1833
+ }));
1834
+ }
1835
+ }
1836
+ }
1837
+ }
1838
+ extractCppDependencies(fileId, filePath, lines, language, dependencies) {
1839
+ const patterns = {
1840
+ // #include <header> or #include "header"
1841
+ includeAngle: /^#include\s+<([^>]+)>/,
1842
+ includeQuote: /^#include\s+"([^"]+)"/,
1843
+ // using namespace foo;
1844
+ usingNamespace: /^using\s+namespace\s+([\w:]+);/,
1845
+ // using foo::bar;
1846
+ usingDecl: /^using\s+([\w:]+);/
1847
+ };
1848
+ for (let i = 0; i < lines.length; i++) {
1849
+ const line = lines[i];
1850
+ const trimmed = line.trim();
1851
+ // Angle bracket include (system/external)
1852
+ let match = trimmed.match(patterns.includeAngle);
1853
+ if (match) {
1854
+ dependencies.push(this.createDependency(fileId, filePath, {
1855
+ targetPath: match[1],
1856
+ importType: 'include',
1857
+ importStatement: trimmed,
1858
+ lineNumber: i + 1,
1859
+ language,
1860
+ isExternal: true,
1861
+ isBuiltin: ['stdio.h', 'stdlib.h', 'string.h', 'math.h', 'iostream', 'vector', 'string', 'map', 'set', 'unordered_map', 'unordered_set', 'algorithm', 'memory', 'functional', 'utility', 'cassert', 'cstdio', 'cstdlib', 'cstring', 'cmath', 'thread', 'mutex', 'atomic', 'chrono', 'filesystem', 'fstream', 'sstream', 'iomanip', 'numeric', 'array', 'deque', 'list', 'queue', 'stack', 'tuple', 'variant', 'optional', 'any', 'type_traits', 'concepts', 'ranges', 'span', 'format'].includes(match[1])
1862
+ }));
1863
+ continue;
1864
+ }
1865
+ // Quote include (local/relative)
1866
+ match = trimmed.match(patterns.includeQuote);
1867
+ if (match) {
1868
+ dependencies.push(this.createDependency(fileId, filePath, {
1869
+ targetPath: match[1],
1870
+ importType: 'include',
1871
+ importStatement: trimmed,
1872
+ lineNumber: i + 1,
1873
+ language,
1874
+ isRelative: true,
1875
+ isExternal: false
1876
+ }));
1877
+ continue;
1878
+ }
1879
+ // using namespace
1880
+ match = trimmed.match(patterns.usingNamespace);
1881
+ if (match) {
1882
+ dependencies.push(this.createDependency(fileId, filePath, {
1883
+ targetPath: match[1],
1884
+ importType: 'using_namespace',
1885
+ importStatement: trimmed,
1886
+ isNamespaceImport: true,
1887
+ lineNumber: i + 1,
1888
+ language,
1889
+ isBuiltin: match[1] === 'std' || match[1].startsWith('std::')
1890
+ }));
1891
+ continue;
1892
+ }
1893
+ // using declaration
1894
+ match = trimmed.match(patterns.usingDecl);
1895
+ if (match) {
1896
+ const parts = match[1].split('::');
1897
+ dependencies.push(this.createDependency(fileId, filePath, {
1898
+ targetPath: match[1],
1899
+ importType: 'using',
1900
+ importStatement: trimmed,
1901
+ importedNames: [parts[parts.length - 1]],
1902
+ lineNumber: i + 1,
1903
+ language,
1904
+ isBuiltin: match[1].startsWith('std::')
1905
+ }));
1906
+ }
1907
+ }
1908
+ }
1909
+ // ========================================
1910
+ // HTML ANALYSIS
1911
+ // ========================================
1912
+ extractHTMLDefinitions(fileId, filePath, lines, language, definitions) {
1913
+ const patterns = {
1914
+ idAttr: /\bid=["']([^"']+)["']/i,
1915
+ classAttr: /\bclass=["']([^"']+)["']/i,
1916
+ scriptOpen: /<script\b[^>]*>/i,
1917
+ scriptClose: /<\/script>/i,
1918
+ styleOpen: /<style\b[^>]*>/i,
1919
+ styleClose: /<\/style>/i,
1920
+ formTag: /<form\b[^>]*(?:id|name)=["']([^"']+)["'][^>]*>/i,
1921
+ templateTag: /<template\b[^>]*(?:id)=["']([^"']+)["'][^>]*>/i,
1922
+ sectionTags: /<(header|footer|nav|main|section|article|aside)\b[^>]*(?:id=["']([^"']+)["'])?[^>]*>/i,
1923
+ dataAttr: /\bdata-([\w-]+)=["']([^"']+)["']/i,
1924
+ componentTags: /<(slot|component)\b[^>]*(?:(?:name|id)=["']([^"']+)["'])?[^>]*>/i,
1925
+ };
1926
+ let inScript = false;
1927
+ let scriptStart = -1;
1928
+ let inStyle = false;
1929
+ let styleStart = -1;
1930
+ for (let i = 0; i < lines.length; i++) {
1931
+ const line = lines[i];
1932
+ // Track script blocks
1933
+ if (!inScript && patterns.scriptOpen.test(line) && !line.includes(' src=')) {
1934
+ inScript = true;
1935
+ scriptStart = i;
1936
+ }
1937
+ if (inScript && patterns.scriptClose.test(line)) {
1938
+ definitions.push(this.createDefinition(fileId, filePath, {
1939
+ name: `script_block_L${scriptStart + 1}`,
1940
+ definitionType: 'script',
1941
+ startLine: scriptStart + 1,
1942
+ endLine: i + 1,
1943
+ language,
1944
+ signature: lines[scriptStart].trim()
1945
+ }));
1946
+ inScript = false;
1947
+ }
1948
+ // Track style blocks
1949
+ if (!inStyle && patterns.styleOpen.test(line)) {
1950
+ inStyle = true;
1951
+ styleStart = i;
1952
+ }
1953
+ if (inStyle && patterns.styleClose.test(line)) {
1954
+ definitions.push(this.createDefinition(fileId, filePath, {
1955
+ name: `style_block_L${styleStart + 1}`,
1956
+ definitionType: 'style',
1957
+ startLine: styleStart + 1,
1958
+ endLine: i + 1,
1959
+ language,
1960
+ signature: lines[styleStart].trim()
1961
+ }));
1962
+ inStyle = false;
1963
+ }
1964
+ // ID attributes
1965
+ const idMatch = line.match(patterns.idAttr);
1966
+ if (idMatch) {
1967
+ definitions.push(this.createDefinition(fileId, filePath, {
1968
+ name: idMatch[1],
1969
+ definitionType: 'element',
1970
+ startLine: i + 1,
1971
+ language,
1972
+ signature: line.trim()
1973
+ }));
1974
+ }
1975
+ // Class attributes
1976
+ const classMatch = line.match(patterns.classAttr);
1977
+ if (classMatch) {
1978
+ const classes = classMatch[1].split(/\s+/).filter(Boolean);
1979
+ for (const cls of classes) {
1980
+ definitions.push(this.createDefinition(fileId, filePath, {
1981
+ name: cls,
1982
+ definitionType: 'class',
1983
+ startLine: i + 1,
1984
+ language,
1985
+ signature: line.trim()
1986
+ }));
1987
+ }
1988
+ }
1989
+ // Form tags
1990
+ const formMatch = line.match(patterns.formTag);
1991
+ if (formMatch) {
1992
+ definitions.push(this.createDefinition(fileId, filePath, {
1993
+ name: formMatch[1],
1994
+ definitionType: 'form',
1995
+ startLine: i + 1,
1996
+ language,
1997
+ signature: line.trim()
1998
+ }));
1999
+ }
2000
+ // Template tags
2001
+ const templateMatch = line.match(patterns.templateTag);
2002
+ if (templateMatch) {
2003
+ definitions.push(this.createDefinition(fileId, filePath, {
2004
+ name: templateMatch[1],
2005
+ definitionType: 'component',
2006
+ startLine: i + 1,
2007
+ language,
2008
+ signature: line.trim()
2009
+ }));
2010
+ }
2011
+ // Section tags with IDs
2012
+ const sectionMatch = line.match(patterns.sectionTags);
2013
+ if (sectionMatch && sectionMatch[2]) {
2014
+ definitions.push(this.createDefinition(fileId, filePath, {
2015
+ name: sectionMatch[2],
2016
+ qualifiedName: `${sectionMatch[1]}#${sectionMatch[2]}`,
2017
+ definitionType: 'element',
2018
+ startLine: i + 1,
2019
+ language,
2020
+ signature: line.trim()
2021
+ }));
2022
+ }
2023
+ // Component/slot tags
2024
+ const componentMatch = line.match(patterns.componentTags);
2025
+ if (componentMatch) {
2026
+ definitions.push(this.createDefinition(fileId, filePath, {
2027
+ name: componentMatch[2] || `${componentMatch[1]}_L${i + 1}`,
2028
+ definitionType: 'component',
2029
+ startLine: i + 1,
2030
+ language,
2031
+ signature: line.trim()
2032
+ }));
2033
+ }
2034
+ // Data attributes on key elements
2035
+ const dataMatch = line.match(patterns.dataAttr);
2036
+ if (dataMatch) {
2037
+ definitions.push(this.createDefinition(fileId, filePath, {
2038
+ name: `data-${dataMatch[1]}`,
2039
+ definitionType: 'attribute',
2040
+ startLine: i + 1,
2041
+ language,
2042
+ signature: line.trim(),
2043
+ metadata: { value: dataMatch[2] }
2044
+ }));
2045
+ }
2046
+ }
2047
+ }
2048
+ extractHTMLDependencies(fileId, filePath, lines, language, dependencies) {
2049
+ const patterns = {
2050
+ scriptSrc: /<script\b[^>]*\bsrc=["']([^"']+)["'][^>]*>/i,
2051
+ linkStylesheet: /<link\b[^>]*\brel=["']stylesheet["'][^>]*\bhref=["']([^"']+)["'][^>]*>/i,
2052
+ linkImport: /<link\b[^>]*\brel=["'](?:import|modulepreload)["'][^>]*\bhref=["']([^"']+)["'][^>]*>/i,
2053
+ imgSrc: /<img\b[^>]*\bsrc=["']([^"']+)["'][^>]*>/i,
2054
+ sourceSrc: /<source\b[^>]*\bsrc=["']([^"']+)["'][^>]*>/i,
2055
+ iframeSrc: /<iframe\b[^>]*\bsrc=["']([^"']+)["'][^>]*>/i,
2056
+ linkHref: /<link\b[^>]*\bhref=["']([^"']+)["'][^>]*>/i,
2057
+ };
2058
+ let inModuleScript = false;
2059
+ for (let i = 0; i < lines.length; i++) {
2060
+ const line = lines[i];
2061
+ // Script src
2062
+ const scriptMatch = line.match(patterns.scriptSrc);
2063
+ if (scriptMatch) {
2064
+ const src = scriptMatch[1];
2065
+ dependencies.push(this.createDependency(fileId, filePath, {
2066
+ targetPath: src,
2067
+ importType: 'script_src',
2068
+ importStatement: line.trim(),
2069
+ lineNumber: i + 1,
2070
+ isExternal: src.startsWith('http') || src.startsWith('//'),
2071
+ isRelative: src.startsWith('.') || (!src.startsWith('/') && !src.startsWith('http') && !src.startsWith('//')),
2072
+ language
2073
+ }));
2074
+ }
2075
+ // Stylesheets
2076
+ const styleMatch = line.match(patterns.linkStylesheet);
2077
+ if (styleMatch) {
2078
+ const href = styleMatch[1];
2079
+ dependencies.push(this.createDependency(fileId, filePath, {
2080
+ targetPath: href,
2081
+ importType: 'stylesheet',
2082
+ importStatement: line.trim(),
2083
+ lineNumber: i + 1,
2084
+ isExternal: href.startsWith('http') || href.startsWith('//'),
2085
+ isRelative: href.startsWith('.') || (!href.startsWith('/') && !href.startsWith('http') && !href.startsWith('//')),
2086
+ language
2087
+ }));
2088
+ }
2089
+ // Link imports/modulepreload
2090
+ const importMatch = line.match(patterns.linkImport);
2091
+ if (importMatch) {
2092
+ const href = importMatch[1];
2093
+ dependencies.push(this.createDependency(fileId, filePath, {
2094
+ targetPath: href,
2095
+ importType: 'import',
2096
+ importStatement: line.trim(),
2097
+ lineNumber: i + 1,
2098
+ isExternal: href.startsWith('http') || href.startsWith('//'),
2099
+ isRelative: href.startsWith('.'),
2100
+ language
2101
+ }));
2102
+ }
2103
+ // Images
2104
+ const imgMatch = line.match(patterns.imgSrc);
2105
+ if (imgMatch) {
2106
+ const src = imgMatch[1];
2107
+ if (!src.startsWith('data:')) {
2108
+ dependencies.push(this.createDependency(fileId, filePath, {
2109
+ targetPath: src,
2110
+ importType: 'asset',
2111
+ importStatement: line.trim(),
2112
+ lineNumber: i + 1,
2113
+ isExternal: src.startsWith('http') || src.startsWith('//'),
2114
+ isRelative: src.startsWith('.') || (!src.startsWith('/') && !src.startsWith('http') && !src.startsWith('//')),
2115
+ language
2116
+ }));
2117
+ }
2118
+ }
2119
+ // Source elements
2120
+ const sourceMatch = line.match(patterns.sourceSrc);
2121
+ if (sourceMatch) {
2122
+ const src = sourceMatch[1];
2123
+ dependencies.push(this.createDependency(fileId, filePath, {
2124
+ targetPath: src,
2125
+ importType: 'asset',
2126
+ importStatement: line.trim(),
2127
+ lineNumber: i + 1,
2128
+ isExternal: src.startsWith('http') || src.startsWith('//'),
2129
+ language
2130
+ }));
2131
+ }
2132
+ // Iframes
2133
+ const iframeMatch = line.match(patterns.iframeSrc);
2134
+ if (iframeMatch) {
2135
+ const src = iframeMatch[1];
2136
+ dependencies.push(this.createDependency(fileId, filePath, {
2137
+ targetPath: src,
2138
+ importType: 'iframe',
2139
+ importStatement: line.trim(),
2140
+ lineNumber: i + 1,
2141
+ isExternal: src.startsWith('http') || src.startsWith('//'),
2142
+ language
2143
+ }));
2144
+ }
2145
+ // Track module script blocks for inline imports
2146
+ if (/<script\b[^>]*type=["']module["'][^>]*>/i.test(line) && !line.includes(' src=')) {
2147
+ inModuleScript = true;
2148
+ }
2149
+ if (inModuleScript && /<\/script>/i.test(line)) {
2150
+ inModuleScript = false;
2151
+ }
2152
+ // Inline ES imports inside module scripts
2153
+ if (inModuleScript) {
2154
+ const esImport = line.match(/^\s*import\s+(?:(?:\{[^}]*\}|\*\s+as\s+\w+|\w+)\s+from\s+)?['"]([^'"]+)['"]/);
2155
+ if (esImport) {
2156
+ const src = esImport[1];
2157
+ dependencies.push(this.createDependency(fileId, filePath, {
2158
+ targetPath: src,
2159
+ importType: 'import',
2160
+ importStatement: line.trim(),
2161
+ lineNumber: i + 1,
2162
+ isExternal: src.startsWith('http') || src.startsWith('//'),
2163
+ isRelative: src.startsWith('.'),
2164
+ language
2165
+ }));
2166
+ }
2167
+ }
2168
+ }
2169
+ }
2170
+ createHTMLChunks(fileId, filePath, content, language, lines) {
2171
+ const chunks = [];
2172
+ const structuralTags = new Set(['head', 'body', 'section', 'div', 'article', 'nav', 'header', 'footer', 'main', 'aside', 'form', 'table']);
2173
+ const openTag = /<(head|body|section|div|article|nav|header|footer|main|aside|form|table)\b[^>]*>/i;
2174
+ const scriptOpen = /<script\b[^>]*>/i;
2175
+ const scriptClose = /<\/script>/i;
2176
+ const styleOpen = /<style\b[^>]*>/i;
2177
+ const styleClose = /<\/style>/i;
2178
+ let chunkIndex = 0;
2179
+ // First pass: extract script and style blocks
2180
+ let inScript = false, scriptStart = -1;
2181
+ let inStyle = false, styleStart = -1;
2182
+ const specialBlocks = []; // {start, end, type}
2183
+ for (let i = 0; i < lines.length; i++) {
2184
+ if (!inScript && scriptOpen.test(lines[i])) {
2185
+ inScript = true;
2186
+ scriptStart = i;
2187
+ }
2188
+ if (inScript && scriptClose.test(lines[i])) {
2189
+ specialBlocks.push({ start: scriptStart, end: i, type: 'code' });
2190
+ inScript = false;
2191
+ }
2192
+ if (!inStyle && styleOpen.test(lines[i])) {
2193
+ inStyle = true;
2194
+ styleStart = i;
2195
+ }
2196
+ if (inStyle && styleClose.test(lines[i])) {
2197
+ specialBlocks.push({ start: styleStart, end: i, type: 'style' });
2198
+ inStyle = false;
2199
+ }
2200
+ }
2201
+ // Second pass: structural blocks at depth 1
2202
+ let tagDepth = 0;
2203
+ let blockStart = -1;
2204
+ let blockTag = null;
2205
+ const structuralBlocks = [];
2206
+ const isInSpecial = (lineNum) => specialBlocks.some(b => lineNum >= b.start && lineNum <= b.end);
2207
+ for (let i = 0; i < lines.length; i++) {
2208
+ if (isInSpecial(i)) continue;
2209
+ const line = lines[i];
2210
+ const openMatch = line.match(openTag);
2211
+ if (openMatch && tagDepth === 0) {
2212
+ blockStart = i;
2213
+ blockTag = openMatch[1].toLowerCase();
2214
+ tagDepth = 1;
2215
+ // Check for self-closing or single-line
2216
+ const closeRegex = new RegExp(`</${blockTag}>`, 'i');
2217
+ if (closeRegex.test(line)) {
2218
+ structuralBlocks.push({ start: i, end: i, tag: blockTag });
2219
+ tagDepth = 0;
2220
+ blockStart = -1;
2221
+ }
2222
+ continue;
2223
+ }
2224
+ if (tagDepth > 0 && blockTag) {
2225
+ const innerOpen = new RegExp(`<${blockTag}\\b`, 'gi');
2226
+ const innerClose = new RegExp(`</${blockTag}>`, 'gi');
2227
+ const opens = (line.match(innerOpen) || []).length;
2228
+ const closes = (line.match(innerClose) || []).length;
2229
+ tagDepth += opens - closes;
2230
+ if (tagDepth <= 0) {
2231
+ structuralBlocks.push({ start: blockStart, end: i, tag: blockTag });
2232
+ tagDepth = 0;
2233
+ blockStart = -1;
2234
+ blockTag = null;
2235
+ }
2236
+ }
2237
+ }
2238
+ // Create chunks from special blocks
2239
+ for (const block of specialBlocks) {
2240
+ const blockLines = lines.slice(block.start, block.end + 1);
2241
+ const blockContent = blockLines.join('\n');
2242
+ const startChar = lines.slice(0, block.start).join('\n').length + (block.start > 0 ? 1 : 0);
2243
+ chunks.push(this.createChunk(fileId, filePath, {
2244
+ chunkIndex: chunkIndex++,
2245
+ startLine: block.start + 1,
2246
+ endLine: block.end + 1,
2247
+ startChar,
2248
+ endChar: startChar + blockContent.length,
2249
+ content: blockContent,
2250
+ language,
2251
+ chunkType: block.type
2252
+ }));
2253
+ }
2254
+ // Create chunks from structural blocks
2255
+ for (const block of structuralBlocks) {
2256
+ const blockLines = lines.slice(block.start, block.end + 1);
2257
+ const blockContent = blockLines.join('\n');
2258
+ const startChar = lines.slice(0, block.start).join('\n').length + (block.start > 0 ? 1 : 0);
2259
+ chunks.push(this.createChunk(fileId, filePath, {
2260
+ chunkIndex: chunkIndex++,
2261
+ startLine: block.start + 1,
2262
+ endLine: block.end + 1,
2263
+ startChar,
2264
+ endChar: startChar + blockContent.length,
2265
+ content: blockContent,
2266
+ language,
2267
+ chunkType: 'structure',
2268
+ metadata: { tag: block.tag }
2269
+ }));
2270
+ }
2271
+ // If no chunks created, fall back to default line-based chunking
2272
+ if (chunks.length === 0) {
2273
+ const fallbackChunks = [];
2274
+ if (lines.length <= this.chunkSize) {
2275
+ fallbackChunks.push(this.createChunk(fileId, filePath, {
2276
+ chunkIndex: 0,
2277
+ startLine: 1,
2278
+ endLine: lines.length,
2279
+ startChar: 0,
2280
+ endChar: content.length,
2281
+ content,
2282
+ language,
2283
+ chunkType: 'markup'
2284
+ }));
2285
+ } else {
2286
+ let ci = 0;
2287
+ let currentLine = 0;
2288
+ while (currentLine < lines.length) {
2289
+ const startLine = currentLine;
2290
+ const endLine = Math.min(currentLine + this.chunkSize, lines.length);
2291
+ const chunkLines = lines.slice(startLine, endLine);
2292
+ const chunkContent = chunkLines.join('\n');
2293
+ const startChar = lines.slice(0, startLine).join('\n').length + (startLine > 0 ? 1 : 0);
2294
+ fallbackChunks.push(this.createChunk(fileId, filePath, {
2295
+ chunkIndex: ci++,
2296
+ startLine: startLine + 1,
2297
+ endLine,
2298
+ startChar,
2299
+ endChar: startChar + chunkContent.length,
2300
+ content: chunkContent,
2301
+ language,
2302
+ chunkType: 'markup'
2303
+ }));
2304
+ currentLine += this.chunkSize - this.chunkOverlap;
2305
+ }
2306
+ }
2307
+ return fallbackChunks;
2308
+ }
2309
+ // Sort chunks by start line
2310
+ chunks.sort((a, b) => a.startLine - b.startLine);
2311
+ return chunks;
2312
+ }
1158
2313
  createDefinition(fileId, filePath, data) {
1159
2314
  return {
1160
2315
  id: uuidv4(),