@aiready/context-analyzer 0.9.25 → 0.9.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/analyzer.ts CHANGED
@@ -371,9 +371,9 @@ export function calculatePathEntropy(files: string[]): number {
371
371
 
372
372
  const total = counts.reduce((s, v) => s + v, 0);
373
373
  let entropy = 0;
374
- for (const c of counts) {
375
- const p = c / total;
376
- entropy -= p * Math.log2(p);
374
+ for (const count of counts) {
375
+ const prob = count / total;
376
+ entropy -= prob * Math.log2(prob);
377
377
  }
378
378
 
379
379
  const maxEntropy = Math.log2(counts.length);
@@ -826,8 +826,8 @@ export function calculateStructuralCohesionFromCoUsage(
826
826
 
827
827
  // Calculate entropy
828
828
  let entropy = 0;
829
- for (const p of probs) {
830
- entropy -= p * Math.log2(p);
829
+ for (const prob of probs) {
830
+ entropy -= prob * Math.log2(prob);
831
831
  }
832
832
 
833
833
  const maxEntropy = Math.log2(probs.length);
@@ -892,10 +892,10 @@ function calculateDomainCohesion(exports: ExportInfo[]): number {
892
892
  const total = domains.length;
893
893
  let entropy = 0;
894
894
 
895
- for (const count of domainCounts.values()) {
896
- const p = count / total;
897
- if (p > 0) {
898
- entropy -= p * Math.log2(p);
895
+ for (const domainCount of domainCounts.values()) {
896
+ const prob = domainCount / total;
897
+ if (prob > 0) {
898
+ entropy -= prob * Math.log2(prob);
899
899
  }
900
900
  }
901
901
 
@@ -911,6 +911,11 @@ function calculateDomainCohesion(exports: ExportInfo[]): number {
911
911
  * - barrel-export: Re-exports from other modules (index.ts files)
912
912
  * - type-definition: Primarily type/interface definitions
913
913
  * - cohesive-module: Single domain, high cohesion (acceptable large files)
914
+ * - utility-module: Utility/helper files with cohesive purpose despite multi-domain
915
+ * - service-file: Service files orchestrating multiple dependencies
916
+ * - lambda-handler: Lambda/API handlers with single business purpose
917
+ * - email-template: Email templates/layouts with structural cohesion
918
+ * - parser-file: Parser/transformer files with single transformation purpose
914
919
  * - mixed-concerns: Multiple domains, potential refactoring candidate
915
920
  * - unknown: Unable to classify
916
921
  */
@@ -936,22 +941,70 @@ export function classifyFile(
936
941
  return 'cohesive-module'; // Treat as cohesive since it's intentional
937
942
  }
938
943
 
939
- // 4. Check for cohesive module (single domain + reasonable cohesion)
944
+ // 4. Check for lambda handlers FIRST (they often look like mixed concerns)
945
+ if (isLambdaHandler(node)) {
946
+ return 'lambda-handler';
947
+ }
948
+
949
+ // 4b. Check for data access layer (DAL) files
950
+ if (isDataAccessFile(node)) {
951
+ return 'cohesive-module';
952
+ }
953
+
954
+ // 5. Check for email templates (they reference multiple domains but serve one purpose)
955
+ if (isEmailTemplate(node)) {
956
+ return 'email-template';
957
+ }
958
+
959
+ // 6. Check for parser/transformer files
960
+ if (isParserFile(node)) {
961
+ return 'parser-file';
962
+ }
963
+
964
+ // 7. Check for service files
965
+ if (isServiceFile(node)) {
966
+ return 'service-file';
967
+ }
968
+
969
+ // 8. Check for session/state management files
970
+ if (isSessionFile(node)) {
971
+ return 'cohesive-module'; // Session files manage state cohesively
972
+ }
973
+
974
+ // 9. Check for Next.js App Router pages (metadata + faqJsonLd + default export)
975
+ if (isNextJsPage(node)) {
976
+ return 'nextjs-page';
977
+ }
978
+
979
+ // 10. Check for utility file pattern (multiple domains but utility purpose)
980
+ if (isUtilityFile(node)) {
981
+ return 'utility-module';
982
+ }
983
+
984
+ // Explicit path-based utility heuristic: files under /utils/ or /helpers/
985
+ // should be classified as utility-module regardless of domain count.
986
+ // This ensures common helper modules (e.g., src/utils/dynamodb-utils.ts)
987
+ // are treated as utility modules in tests and analysis.
988
+ if (file.toLowerCase().includes('/utils/') || file.toLowerCase().includes('/helpers/')) {
989
+ return 'utility-module';
990
+ }
991
+
992
+ // 10. Check for cohesive module (single domain + reasonable cohesion)
940
993
  const uniqueDomains = domains.filter(d => d !== 'unknown');
941
994
  const hasSingleDomain = uniqueDomains.length <= 1;
942
- const hasReasonableCohesion = cohesionScore >= 0.5; // Lowered threshold
943
995
 
944
996
  // Single domain files are almost always cohesive (even with lower cohesion score)
945
997
  if (hasSingleDomain) {
946
998
  return 'cohesive-module';
947
999
  }
948
-
949
- // 5. Check for utility file pattern (multiple domains but utility purpose)
950
- if (isUtilityFile(node)) {
951
- return 'cohesive-module'; // Utilities often have mixed imports by design
1000
+
1001
+ // 10b. Check for shared entity noun despite multi-domain scoring
1002
+ // e.g. getUserReceipts + createPendingReceipt both refer to 'receipt'
1003
+ if (allExportsShareEntityNoun(exports)) {
1004
+ return 'cohesive-module';
952
1005
  }
953
1006
 
954
- // 6. Check for mixed concerns (multiple domains + low cohesion)
1007
+ // 11. Check for mixed concerns (multiple domains + low cohesion)
955
1008
  const hasMultipleDomains = uniqueDomains.length > 1;
956
1009
  const hasLowCohesion = cohesionScore < 0.4; // Lowered threshold
957
1010
 
@@ -959,7 +1012,7 @@ export function classifyFile(
959
1012
  return 'mixed-concerns';
960
1013
  }
961
1014
 
962
- // 7. Default to cohesive-module for files with reasonable cohesion
1015
+ // 12. Default to cohesive-module for files with reasonable cohesion
963
1016
  // This reduces false positives for legitimate files
964
1017
  if (cohesionScore >= 0.5) {
965
1018
  return 'cohesive-module';
@@ -1019,6 +1072,7 @@ function isBarrelExport(node: DependencyNode): boolean {
1019
1072
  * - Mostly type/interface exports
1020
1073
  * - Little to no runtime code
1021
1074
  * - Often named *.d.ts or types.ts
1075
+ * - Located in /types/, /typings/, or @types directories
1022
1076
  */
1023
1077
  function isTypeDefinitionFile(node: DependencyNode): boolean {
1024
1078
  const { file, exports } = node;
@@ -1028,6 +1082,14 @@ function isTypeDefinitionFile(node: DependencyNode): boolean {
1028
1082
  const isTypesFile = fileName?.includes('types') || fileName?.includes('.d.ts') ||
1029
1083
  fileName === 'types.ts' || fileName === 'interfaces.ts';
1030
1084
 
1085
+ // Check if file is in a types directory (path-based detection)
1086
+ const lowerPath = file.toLowerCase();
1087
+ const isTypesPath = lowerPath.includes('/types/') ||
1088
+ lowerPath.includes('/typings/') ||
1089
+ lowerPath.includes('/@types/') ||
1090
+ lowerPath.startsWith('types/') ||
1091
+ lowerPath.startsWith('typings/');
1092
+
1031
1093
  // Count type exports vs other exports
1032
1094
  const typeExports = exports.filter(e => e.type === 'type' || e.type === 'interface');
1033
1095
  const runtimeExports = exports.filter(e => e.type === 'function' || e.type === 'class' || e.type === 'const');
@@ -1037,7 +1099,13 @@ function isTypeDefinitionFile(node: DependencyNode): boolean {
1037
1099
  typeExports.length > runtimeExports.length &&
1038
1100
  typeExports.length / exports.length > 0.7;
1039
1101
 
1040
- return isTypesFile || mostlyTypes;
1102
+ // Pure type files (only type/interface exports, no runtime code)
1103
+ const pureTypeFile = exports.length > 0 && typeExports.length === exports.length;
1104
+
1105
+ // Empty export file in types directory (might just be re-exports)
1106
+ const emptyOrReExportInTypesDir = isTypesPath && exports.length === 0;
1107
+
1108
+ return isTypesFile || isTypesPath || mostlyTypes || pureTypeFile || emptyOrReExportInTypesDir;
1041
1109
  }
1042
1110
 
1043
1111
  /**
@@ -1095,8 +1163,8 @@ function isUtilityFile(node: DependencyNode): boolean {
1095
1163
  // Check filename patterns for utility files
1096
1164
  const utilityPatterns = [
1097
1165
  'util', 'utility', 'utilities', 'helper', 'helpers',
1098
- 'common', 'shared', 'lib', 'toolbox', 'toolkit',
1099
- '.util.', '-util.', '_util.',
1166
+ 'common', 'shared', 'toolbox', 'toolkit',
1167
+ '.util.', '-util.', '_util.', '-utils.', '.utils.',
1100
1168
  ];
1101
1169
 
1102
1170
  const isUtilityName = utilityPatterns.some(pattern =>
@@ -1106,15 +1174,590 @@ function isUtilityFile(node: DependencyNode): boolean {
1106
1174
  // Check if file is in a utils/helpers directory
1107
1175
  const isUtilityPath = file.toLowerCase().includes('/utils/') ||
1108
1176
  file.toLowerCase().includes('/helpers/') ||
1109
- file.toLowerCase().includes('/lib/') ||
1110
- file.toLowerCase().includes('/common/');
1177
+ file.toLowerCase().includes('/common/') ||
1178
+ file.toLowerCase().endsWith('-utils.ts') ||
1179
+ file.toLowerCase().endsWith('-util.ts') ||
1180
+ file.toLowerCase().endsWith('-helper.ts') ||
1181
+ file.toLowerCase().endsWith('-helpers.ts');
1182
+
1183
+ // Only consider many small exports as utility pattern if also in utility-like path
1184
+ // This prevents false positives for regular modules with many functions
1185
+ const hasManySmallExportsInUtilityContext = exports.length >= 3 &&
1186
+ exports.every(e => e.type === 'function' || e.type === 'const') &&
1187
+ (isUtilityName || isUtilityPath);
1188
+
1189
+ return isUtilityName || isUtilityPath || hasManySmallExportsInUtilityContext;
1190
+ }
1191
+
1192
+ /**
1193
+ * Split a camelCase or PascalCase identifier into lowercase tokens.
1194
+ * e.g. getUserReceipts -> ['get', 'user', 'receipts']
1195
+ */
1196
+ function splitCamelCase(name: string): string[] {
1197
+ return name
1198
+ .replace(/([A-Z])/g, ' $1')
1199
+ .trim()
1200
+ .toLowerCase()
1201
+ .split(/[\s_-]+/)
1202
+ .filter(Boolean);
1203
+ }
1204
+
1205
+ /** Common English verbs and adjectives to ignore when extracting entity nouns */
1206
+ const SKIP_WORDS = new Set([
1207
+ 'get', 'set', 'create', 'update', 'delete', 'fetch', 'save', 'load',
1208
+ 'parse', 'format', 'validate', 'convert', 'transform', 'build',
1209
+ 'generate', 'render', 'send', 'receive', 'find', 'list', 'add',
1210
+ 'remove', 'insert', 'upsert', 'put', 'read', 'write', 'check',
1211
+ 'handle', 'process', 'compute', 'calculate', 'init', 'reset', 'clear',
1212
+ 'pending', 'active', 'current', 'new', 'old', 'all', 'by', 'with',
1213
+ 'from', 'to', 'and', 'or', 'is', 'has', 'in', 'on', 'of', 'the',
1214
+ ]);
1215
+
1216
+ /** Singularize a word simply (strip trailing 's') */
1217
+ function simpleSingularize(word: string): string {
1218
+ if (word.endsWith('ies') && word.length > 3) return word.slice(0, -3) + 'y';
1219
+ if (word.endsWith('ses') && word.length > 4) return word.slice(0, -2);
1220
+ if (word.endsWith('s') && word.length > 3) return word.slice(0, -1);
1221
+ return word;
1222
+ }
1223
+
1224
+ /**
1225
+ * Extract meaningful entity nouns from a camelCase/PascalCase function name.
1226
+ * Strips common verbs/adjectives and singularizes remainder.
1227
+ */
1228
+ function extractEntityNouns(name: string): string[] {
1229
+ return splitCamelCase(name)
1230
+ .filter(token => !SKIP_WORDS.has(token) && token.length > 2)
1231
+ .map(simpleSingularize);
1232
+ }
1233
+
1234
+ /**
1235
+ * Check whether all exports in a file share at least one common entity noun.
1236
+ * This catches DAL patterns like getUserReceipts + createPendingReceipt → both 'receipt'.
1237
+ */
1238
+ function allExportsShareEntityNoun(exports: ExportInfo[]): boolean {
1239
+ if (exports.length < 2 || exports.length > 30) return false;
1240
+
1241
+ const nounSets = exports.map(e => new Set(extractEntityNouns(e.name)));
1242
+ if (nounSets.some(s => s.size === 0)) return false;
1243
+
1244
+ // Find nouns that appear in ALL exports
1245
+ const [first, ...rest] = nounSets;
1246
+ const commonNouns = Array.from(first).filter(noun =>
1247
+ rest.every(s => s.has(noun))
1248
+ );
1249
+
1250
+ return commonNouns.length > 0;
1251
+ }
1252
+
1253
+ /**
1254
+ * Detect if a file is a Data Access Layer (DAL) / repository module.
1255
+ *
1256
+ * Characteristics:
1257
+ * - Named with db, dynamo, database, repository, dao, postgres, mongo patterns
1258
+ * - Or located in /repositories/, /dao/, /data/ directories
1259
+ * - Exports all relate to one data store or entity
1260
+ */
1261
+ function isDataAccessFile(node: DependencyNode): boolean {
1262
+ const { file, exports } = node;
1263
+ const fileName = file.split('/').pop()?.toLowerCase();
1264
+
1265
+ const dalPatterns = [
1266
+ 'dynamo', 'database', 'repository', 'repo', 'dao',
1267
+ 'firestore', 'postgres', 'mysql', 'mongo', 'redis',
1268
+ 'sqlite', 'supabase', 'prisma',
1269
+ ];
1270
+
1271
+ const isDalName = dalPatterns.some(p => fileName?.includes(p));
1272
+
1273
+ const isDalPath = file.toLowerCase().includes('/repositories/') ||
1274
+ file.toLowerCase().includes('/dao/') ||
1275
+ file.toLowerCase().includes('/data/');
1276
+
1277
+ // File with few exports (≤10) that all share a common entity noun
1278
+ const hasDalExportPattern = exports.length >= 1 &&
1279
+ exports.length <= 10 &&
1280
+ allExportsShareEntityNoun(exports);
1281
+
1282
+ // Exclude obvious utility paths from DAL detection (e.g., src/utils/)
1283
+ const isUtilityPathLocal = file.toLowerCase().includes('/utils/') || file.toLowerCase().includes('/helpers/');
1284
+
1285
+ // Only treat as DAL when the file is in a DAL path, or when the name/pattern
1286
+ // indicates a data access module AND exports follow a DAL-like pattern.
1287
+ // Do not classify utility paths as DAL even if the name contains DAL keywords.
1288
+ return isDalPath || (isDalName && hasDalExportPattern && !isUtilityPathLocal);
1289
+ }
1290
+
1291
+ /**
1292
+ * Detect if a file is a Lambda/API handler
1293
+ *
1294
+ * Characteristics:
1295
+ * - Named with handler patterns or in handler directories
1296
+ * - Single entry point (handler function)
1297
+ * - Coordinates multiple services but has single business purpose
1298
+ */
1299
+ function isLambdaHandler(node: DependencyNode): boolean {
1300
+ const { file, exports } = node;
1301
+
1302
+ const fileName = file.split('/').pop()?.toLowerCase();
1303
+
1304
+ // Check filename patterns for lambda handlers
1305
+ const handlerPatterns = [
1306
+ 'handler', '.handler.', '-handler.',
1307
+ 'lambda', '.lambda.', '-lambda.',
1308
+ ];
1309
+
1310
+ const isHandlerName = handlerPatterns.some(pattern =>
1311
+ fileName?.includes(pattern)
1312
+ );
1313
+
1314
+ // Check if file is in a handlers/lambdas/functions/lambda directory
1315
+ // Exclude /api/ unless it has handler-specific naming
1316
+ const isHandlerPath = file.toLowerCase().includes('/handlers/') ||
1317
+ file.toLowerCase().includes('/lambdas/') ||
1318
+ file.toLowerCase().includes('/lambda/') ||
1319
+ file.toLowerCase().includes('/functions/');
1320
+
1321
+ // Check for typical lambda handler exports (handler, main, etc.)
1322
+ const hasHandlerExport = exports.some(e =>
1323
+ e.name.toLowerCase() === 'handler' ||
1324
+ e.name.toLowerCase() === 'main' ||
1325
+ e.name.toLowerCase() === 'lambdahandler' ||
1326
+ e.name.toLowerCase().endsWith('handler')
1327
+ );
1328
+
1329
+ // Only consider single export as lambda handler if it's in a handler-like context
1330
+ // (either in handler directory OR has handler naming)
1331
+ const hasSingleEntryInHandlerContext = exports.length === 1 &&
1332
+ (exports[0].type === 'function' || exports[0].name === 'default') &&
1333
+ (isHandlerPath || isHandlerName);
1334
+
1335
+ return isHandlerName || isHandlerPath || hasHandlerExport || hasSingleEntryInHandlerContext;
1336
+ }
1337
+
1338
+ /**
1339
+ * Detect if a file is a service file
1340
+ *
1341
+ * Characteristics:
1342
+ * - Named with service pattern
1343
+ * - Often a class or object with multiple methods
1344
+ * - Orchestrates multiple dependencies but serves single purpose
1345
+ */
1346
+ function isServiceFile(node: DependencyNode): boolean {
1347
+ const { file, exports } = node;
1348
+
1349
+ const fileName = file.split('/').pop()?.toLowerCase();
1350
+
1351
+ // Check filename patterns for service files
1352
+ const servicePatterns = [
1353
+ 'service', '.service.', '-service.', '_service.',
1354
+ ];
1355
+
1356
+ const isServiceName = servicePatterns.some(pattern =>
1357
+ fileName?.includes(pattern)
1358
+ );
1359
+
1360
+ // Check if file is in a services directory
1361
+ const isServicePath = file.toLowerCase().includes('/services/');
1362
+
1363
+ // Check for service-like exports (class with "Service" in the name)
1364
+ const hasServiceNamedExport = exports.some(e =>
1365
+ e.name.toLowerCase().includes('service') ||
1366
+ e.name.toLowerCase().endsWith('service')
1367
+ );
1368
+
1369
+ // Check for typical service pattern (class export with service in name)
1370
+ const hasClassExport = exports.some(e => e.type === 'class');
1371
+
1372
+ // Service files need either:
1373
+ // 1. Service in filename/path, OR
1374
+ // 2. Class with "Service" in the class name
1375
+ return isServiceName || isServicePath || (hasServiceNamedExport && hasClassExport);
1376
+ }
1377
+
1378
+ /**
1379
+ * Detect if a file is an email template/layout
1380
+ *
1381
+ * Characteristics:
1382
+ * - Named with email/template patterns
1383
+ * - Contains render/template logic
1384
+ * - References multiple domains (user, order, product) but serves single template purpose
1385
+ */
1386
+ function isEmailTemplate(node: DependencyNode): boolean {
1387
+ const { file, exports } = node;
1388
+
1389
+ const fileName = file.split('/').pop()?.toLowerCase();
1390
+
1391
+ // Check filename patterns for email templates (more specific patterns)
1392
+ const emailTemplatePatterns = [
1393
+ '-email-', '.email.', '_email_',
1394
+ '-template', '.template.', '_template',
1395
+ '-mail.', '.mail.',
1396
+ ];
1397
+
1398
+ const isEmailTemplateName = emailTemplatePatterns.some(pattern =>
1399
+ fileName?.includes(pattern)
1400
+ );
1401
+
1402
+ // Specific template file names
1403
+ const isSpecificTemplateName =
1404
+ fileName?.includes('receipt') ||
1405
+ fileName?.includes('invoice-email') ||
1406
+ fileName?.includes('welcome-email') ||
1407
+ fileName?.includes('notification-email') ||
1408
+ fileName?.includes('writer') && fileName.includes('receipt');
1409
+
1410
+ // Check if file is in emails/templates directory (high confidence)
1411
+ const isEmailPath = file.toLowerCase().includes('/emails/') ||
1412
+ file.toLowerCase().includes('/mail/') ||
1413
+ file.toLowerCase().includes('/notifications/');
1414
+
1415
+ // Check for template patterns (function that returns string/HTML)
1416
+ // More specific: must have render/generate in the function name
1417
+ const hasTemplateFunction = exports.some(e =>
1418
+ e.type === 'function' && (
1419
+ e.name.toLowerCase().startsWith('render') ||
1420
+ e.name.toLowerCase().startsWith('generate') ||
1421
+ (e.name.toLowerCase().includes('template') && e.name.toLowerCase().includes('email'))
1422
+ )
1423
+ );
1424
+
1425
+ // Check for email-related exports (but not service classes)
1426
+ const hasEmailExport = exports.some(e =>
1427
+ (e.name.toLowerCase().includes('template') && e.type === 'function') ||
1428
+ (e.name.toLowerCase().includes('render') && e.type === 'function') ||
1429
+ (e.name.toLowerCase().includes('email') && e.type !== 'class')
1430
+ );
1431
+
1432
+ // Require path-based match OR combination of name and export patterns
1433
+ return isEmailPath || isEmailTemplateName || isSpecificTemplateName ||
1434
+ (hasTemplateFunction && hasEmailExport);
1435
+ }
1436
+
1437
+ /**
1438
+ * Detect if a file is a parser/transformer
1439
+ *
1440
+ * Characteristics:
1441
+ * - Named with parser/transform patterns
1442
+ * - Contains parse/transform logic
1443
+ * - Single transformation purpose despite touching multiple domains
1444
+ */
1445
+ function isParserFile(node: DependencyNode): boolean {
1446
+ const { file, exports } = node;
1447
+
1448
+ const fileName = file.split('/').pop()?.toLowerCase();
1449
+
1450
+ // Check filename patterns for parser files
1451
+ const parserPatterns = [
1452
+ 'parser', '.parser.', '-parser.', '_parser.',
1453
+ 'transform', '.transform.', '-transform.',
1454
+ 'converter', '.converter.', '-converter.',
1455
+ 'mapper', '.mapper.', '-mapper.',
1456
+ 'serializer', '.serializer.',
1457
+ 'deterministic', // For base-parser-deterministic.ts pattern
1458
+ ];
1459
+
1460
+ const isParserName = parserPatterns.some(pattern =>
1461
+ fileName?.includes(pattern)
1462
+ );
1463
+
1464
+ // Check if file is in parsers/transformers directory
1465
+ const isParserPath = file.toLowerCase().includes('/parsers/') ||
1466
+ file.toLowerCase().includes('/transformers/') ||
1467
+ file.toLowerCase().includes('/converters/') ||
1468
+ file.toLowerCase().includes('/mappers/');
1469
+
1470
+ // Check for parser-related exports
1471
+ const hasParserExport = exports.some(e =>
1472
+ e.name.toLowerCase().includes('parse') ||
1473
+ e.name.toLowerCase().includes('transform') ||
1474
+ e.name.toLowerCase().includes('convert') ||
1475
+ e.name.toLowerCase().includes('map') ||
1476
+ e.name.toLowerCase().includes('serialize') ||
1477
+ e.name.toLowerCase().includes('deserialize')
1478
+ );
1479
+
1480
+ // Check for function patterns typical of parsers
1481
+ const hasParseFunction = exports.some(e =>
1482
+ e.type === 'function' && (
1483
+ e.name.toLowerCase().startsWith('parse') ||
1484
+ e.name.toLowerCase().startsWith('transform') ||
1485
+ e.name.toLowerCase().startsWith('convert') ||
1486
+ e.name.toLowerCase().startsWith('map') ||
1487
+ e.name.toLowerCase().startsWith('extract')
1488
+ )
1489
+ );
1490
+
1491
+ return isParserName || isParserPath || hasParserExport || hasParseFunction;
1492
+ }
1493
+
1494
+ /**
1495
+ * Detect if a file is a session/state management file
1496
+ *
1497
+ * Characteristics:
1498
+ * - Named with session/state patterns
1499
+ * - Manages state across operations
1500
+ * - Single purpose despite potentially touching multiple domains
1501
+ */
1502
+ function isSessionFile(node: DependencyNode): boolean {
1503
+ const { file, exports } = node;
1111
1504
 
1112
- // Check if file has many small utility-like exports
1113
- const hasManySmallExports = exports.length >= 3 && exports.every(e =>
1114
- e.type === 'function' || e.type === 'const'
1505
+ const fileName = file.split('/').pop()?.toLowerCase();
1506
+
1507
+ // Check filename patterns for session files
1508
+ const sessionPatterns = [
1509
+ 'session', '.session.', '-session.',
1510
+ 'state', '.state.', '-state.',
1511
+ 'context', '.context.', '-context.',
1512
+ 'store', '.store.', '-store.',
1513
+ ];
1514
+
1515
+ const isSessionName = sessionPatterns.some(pattern =>
1516
+ fileName?.includes(pattern)
1517
+ );
1518
+
1519
+ // Check if file is in sessions/state directory
1520
+ const isSessionPath = file.toLowerCase().includes('/sessions/') ||
1521
+ file.toLowerCase().includes('/state/') ||
1522
+ file.toLowerCase().includes('/context/') ||
1523
+ file.toLowerCase().includes('/store/');
1524
+
1525
+ // Check for session-related exports
1526
+ const hasSessionExport = exports.some(e =>
1527
+ e.name.toLowerCase().includes('session') ||
1528
+ e.name.toLowerCase().includes('state') ||
1529
+ e.name.toLowerCase().includes('context') ||
1530
+ e.name.toLowerCase().includes('manager') ||
1531
+ e.name.toLowerCase().includes('store')
1532
+ );
1533
+
1534
+ return isSessionName || isSessionPath || hasSessionExport;
1535
+ }
1536
+
1537
+ /**
1538
+ * Detect if a file is a Next.js App Router page
1539
+ *
1540
+ * Characteristics:
1541
+ * - Located in /app/ directory (Next.js App Router)
1542
+ * - Named page.tsx or page.ts
1543
+ * - Exports: metadata (SEO), default (page component), and optionally:
1544
+ * - faqJsonLd, jsonLd (structured data)
1545
+ * - icon (for tool cards)
1546
+ * - generateMetadata (dynamic SEO)
1547
+ *
1548
+ * This is the canonical Next.js pattern for SEO-optimized pages.
1549
+ * Multiple exports are COHESIVE - they all serve the page's purpose.
1550
+ */
1551
+ function isNextJsPage(node: DependencyNode): boolean {
1552
+ const { file, exports } = node;
1553
+
1554
+ const lowerPath = file.toLowerCase();
1555
+ const fileName = file.split('/').pop()?.toLowerCase();
1556
+
1557
+ // Must be in /app/ directory (Next.js App Router)
1558
+ const isInAppDir = lowerPath.includes('/app/') || lowerPath.startsWith('app/');
1559
+
1560
+ // Must be named page.tsx or page.ts
1561
+ const isPageFile = fileName === 'page.tsx' || fileName === 'page.ts';
1562
+
1563
+ if (!isInAppDir || !isPageFile) {
1564
+ return false;
1565
+ }
1566
+
1567
+ // Check for Next.js page export patterns
1568
+ const exportNames = exports.map(e => e.name.toLowerCase());
1569
+
1570
+ // Must have default export (the page component)
1571
+ const hasDefaultExport = exports.some(e => e.type === 'default');
1572
+
1573
+ // Common Next.js page exports
1574
+ const nextJsExports = ['metadata', 'generatemetadata', 'faqjsonld', 'jsonld', 'icon', 'viewport', 'dynamic'];
1575
+ const hasNextJsExports = exportNames.some(name =>
1576
+ nextJsExports.includes(name) || name.includes('jsonld')
1115
1577
  );
1116
1578
 
1117
- return isUtilityName || isUtilityPath || hasManySmallExports;
1579
+ // A Next.js page typically has:
1580
+ // 1. Default export (page component) - required
1581
+ // 2. Metadata or other Next.js-specific exports - optional but indicative
1582
+ return hasDefaultExport || hasNextJsExports;
1583
+ }
1584
+
1585
+ /**
1586
+ * Adjust cohesion score based on file classification.
1587
+ *
1588
+ * This reduces false positives by recognizing that certain file types
1589
+ * have inherently different cohesion patterns:
1590
+ * - Utility modules may touch multiple domains but serve one purpose
1591
+ * - Service files orchestrate multiple dependencies
1592
+ * - Lambda handlers coordinate multiple services
1593
+ * - Email templates reference multiple domains for rendering
1594
+ * - Parser files transform data across domains
1595
+ *
1596
+ * @param baseCohesion - The calculated cohesion score (0-1)
1597
+ * @param classification - The file classification
1598
+ * @param node - Optional node for additional heuristics
1599
+ * @returns Adjusted cohesion score (0-1)
1600
+ */
1601
+ export function adjustCohesionForClassification(
1602
+ baseCohesion: number,
1603
+ classification: FileClassification,
1604
+ node?: DependencyNode
1605
+ ): number {
1606
+ switch (classification) {
1607
+ case 'barrel-export':
1608
+ // Barrel exports re-export from multiple modules by design
1609
+ return 1;
1610
+ case 'type-definition':
1611
+ // Type definitions centralize types - high cohesion by nature
1612
+ return 1;
1613
+ case 'utility-module': {
1614
+ // Utility modules serve a functional purpose despite multi-domain.
1615
+ // Use a floor of 0.75 so related utilities never appear as low-cohesion.
1616
+ if (node) {
1617
+ const exportNames = node.exports.map(e => e.name.toLowerCase());
1618
+ const hasRelatedNames = hasRelatedExportNames(exportNames);
1619
+ if (hasRelatedNames) {
1620
+ return Math.max(0.80, Math.min(1, baseCohesion + 0.45));
1621
+ }
1622
+ }
1623
+ return Math.max(0.75, Math.min(1, baseCohesion + 0.35));
1624
+ }
1625
+ case 'service-file': {
1626
+ // Services orchestrate dependencies by design.
1627
+ // Floor at 0.72 so service files are never flagged as low-cohesion.
1628
+ if (node?.exports.some(e => e.type === 'class')) {
1629
+ return Math.max(0.78, Math.min(1, baseCohesion + 0.40));
1630
+ }
1631
+ return Math.max(0.72, Math.min(1, baseCohesion + 0.30));
1632
+ }
1633
+ case 'lambda-handler': {
1634
+ // Lambda handlers have single business purpose; floor at 0.75.
1635
+ if (node) {
1636
+ const hasSingleEntry = node.exports.length === 1 ||
1637
+ node.exports.some(e => e.name.toLowerCase() === 'handler');
1638
+ if (hasSingleEntry) {
1639
+ return Math.max(0.80, Math.min(1, baseCohesion + 0.45));
1640
+ }
1641
+ }
1642
+ return Math.max(0.75, Math.min(1, baseCohesion + 0.35));
1643
+ }
1644
+ case 'email-template': {
1645
+ // Email templates are structurally cohesive (single rendering purpose); floor at 0.72.
1646
+ if (node) {
1647
+ const hasTemplateFunc = node.exports.some(e =>
1648
+ e.name.toLowerCase().includes('render') ||
1649
+ e.name.toLowerCase().includes('generate') ||
1650
+ e.name.toLowerCase().includes('template')
1651
+ );
1652
+ if (hasTemplateFunc) {
1653
+ return Math.max(0.75, Math.min(1, baseCohesion + 0.40));
1654
+ }
1655
+ }
1656
+ return Math.max(0.72, Math.min(1, baseCohesion + 0.30));
1657
+ }
1658
+ case 'parser-file': {
1659
+ // Parsers transform data - single transformation purpose
1660
+ if (node) {
1661
+ // Check for parse/transform functions
1662
+ const hasParseFunc = node.exports.some(e =>
1663
+ e.name.toLowerCase().startsWith('parse') ||
1664
+ e.name.toLowerCase().startsWith('transform') ||
1665
+ e.name.toLowerCase().startsWith('convert')
1666
+ );
1667
+ if (hasParseFunc) {
1668
+ return Math.max(0.75, Math.min(1, baseCohesion + 0.40));
1669
+ }
1670
+ }
1671
+ return Math.max(0.70, Math.min(1, baseCohesion + 0.30));
1672
+ }
1673
+ case 'nextjs-page':
1674
+ // Next.js pages have multiple exports by design (metadata, jsonLd, page component)
1675
+ // All serve the single purpose of rendering an SEO-optimized page
1676
+ return 1;
1677
+ case 'cohesive-module':
1678
+ // Already recognized as cohesive
1679
+ return Math.max(baseCohesion, 0.7);
1680
+ case 'mixed-concerns':
1681
+ // Keep original score - this is a real issue
1682
+ return baseCohesion;
1683
+ default:
1684
+ // Unknown - give benefit of doubt with small boost
1685
+ return Math.min(1, baseCohesion + 0.10);
1686
+ }
1687
+ }
1688
+
1689
+ /**
1690
+ * Check if export names suggest related functionality
1691
+ *
1692
+ * Examples of related patterns:
1693
+ * - formatDate, parseDate, validateDate (date utilities)
1694
+ * - getUser, saveUser, deleteUser (user utilities)
1695
+ * - DynamoDB, S3, SQS (AWS utilities)
1696
+ */
1697
+ function hasRelatedExportNames(exportNames: string[]): boolean {
1698
+ if (exportNames.length < 2) return true;
1699
+
1700
+ // Extract common prefixes/suffixes
1701
+ const stems = new Set<string>();
1702
+ const domains = new Set<string>();
1703
+
1704
+ for (const name of exportNames) {
1705
+ // Check for common verb prefixes
1706
+ const verbs = ['get', 'set', 'create', 'update', 'delete', 'fetch', 'save', 'load', 'parse', 'format', 'validate', 'convert', 'transform', 'build', 'generate', 'render', 'send', 'receive'];
1707
+ for (const verb of verbs) {
1708
+ if (name.startsWith(verb) && name.length > verb.length) {
1709
+ stems.add(name.slice(verb.length).toLowerCase());
1710
+ }
1711
+ }
1712
+
1713
+ // Check for domain suffixes (User, Order, etc.)
1714
+ const domainPatterns = ['user', 'order', 'product', 'session', 'email', 'file', 'db', 's3', 'dynamo', 'api', 'config'];
1715
+ for (const domain of domainPatterns) {
1716
+ if (name.includes(domain)) {
1717
+ domains.add(domain);
1718
+ }
1719
+ }
1720
+ }
1721
+
1722
+ // If exports share common stems or domains, they're related
1723
+ if (stems.size === 1 && exportNames.length >= 2) return true;
1724
+ if (domains.size === 1 && exportNames.length >= 2) return true;
1725
+
1726
+ // Check for utilities with same service prefix (e.g., dynamodbGet, dynamodbPut)
1727
+ const prefixes = exportNames.map(name => {
1728
+ // Extract prefix before first capital letter or common separator
1729
+ const match = name.match(/^([a-z]+)/);
1730
+ return match ? match[1] : '';
1731
+ }).filter(p => p.length >= 3);
1732
+
1733
+ if (prefixes.length >= 2) {
1734
+ const uniquePrefixes = new Set(prefixes);
1735
+ if (uniquePrefixes.size === 1) return true;
1736
+ }
1737
+
1738
+ // Check for shared entity noun across all exports using camelCase token splitting
1739
+ // e.g. getUserReceipts + createPendingReceipt both contain 'receipt'
1740
+ const nounSets = exportNames.map(name => {
1741
+ const tokens = name
1742
+ .replace(/([A-Z])/g, ' $1')
1743
+ .trim()
1744
+ .toLowerCase()
1745
+ .split(/[\s_-]+/)
1746
+ .filter(Boolean);
1747
+ const skip = new Set(['get','set','create','update','delete','fetch','save','load',
1748
+ 'parse','format','validate','convert','transform','build','generate','render',
1749
+ 'send','receive','find','list','add','remove','insert','upsert','put','read',
1750
+ 'write','check','handle','process','pending','active','current','new','old','all']);
1751
+ const singularize = (w: string) => w.endsWith('s') && w.length > 3 ? w.slice(0,-1) : w;
1752
+ return new Set(tokens.filter(t => !skip.has(t) && t.length > 2).map(singularize));
1753
+ });
1754
+ if (nounSets.length >= 2 && nounSets.every(s => s.size > 0)) {
1755
+ const [first, ...rest] = nounSets;
1756
+ const commonNouns = Array.from(first).filter(n => rest.every(s => s.has(n)));
1757
+ if (commonNouns.length > 0) return true;
1758
+ }
1759
+
1760
+ return false;
1118
1761
  }
1119
1762
 
1120
1763
  /**
@@ -1124,6 +1767,7 @@ function isUtilityFile(node: DependencyNode): boolean {
1124
1767
  * - Ignoring fragmentation for barrel exports (they're meant to aggregate)
1125
1768
  * - Ignoring fragmentation for type definitions (centralized types are good)
1126
1769
  * - Reducing fragmentation for cohesive modules (large but focused is OK)
1770
+ * - Reducing fragmentation for utility/service/handler/template files
1127
1771
  */
1128
1772
  export function adjustFragmentationForClassification(
1129
1773
  baseFragmentation: number,
@@ -1136,6 +1780,15 @@ export function adjustFragmentationForClassification(
1136
1780
  case 'type-definition':
1137
1781
  // Centralized type definitions are good practice - no fragmentation
1138
1782
  return 0;
1783
+ case 'utility-module':
1784
+ case 'service-file':
1785
+ case 'lambda-handler':
1786
+ case 'email-template':
1787
+ case 'parser-file':
1788
+ case 'nextjs-page':
1789
+ // These file types have structural reasons for touching multiple domains
1790
+ // Reduce fragmentation significantly
1791
+ return baseFragmentation * 0.2;
1139
1792
  case 'cohesive-module':
1140
1793
  // Cohesive modules get a significant discount
1141
1794
  return baseFragmentation * 0.3;
@@ -1172,6 +1825,36 @@ export function getClassificationRecommendations(
1172
1825
  'Module has good cohesion despite its size',
1173
1826
  'Consider documenting the module boundaries for AI assistants',
1174
1827
  ];
1828
+ case 'utility-module':
1829
+ return [
1830
+ 'Utility module detected - multiple domains are acceptable here',
1831
+ 'Consider grouping related utilities by prefix or domain for better discoverability',
1832
+ ];
1833
+ case 'service-file':
1834
+ return [
1835
+ 'Service file detected - orchestration of multiple dependencies is expected',
1836
+ 'Consider documenting service boundaries and dependencies',
1837
+ ];
1838
+ case 'lambda-handler':
1839
+ return [
1840
+ 'Lambda handler detected - coordination of services is expected',
1841
+ 'Ensure handler has clear single responsibility',
1842
+ ];
1843
+ case 'email-template':
1844
+ return [
1845
+ 'Email template detected - references multiple domains for rendering',
1846
+ 'Template structure is cohesive by design',
1847
+ ];
1848
+ case 'parser-file':
1849
+ return [
1850
+ 'Parser/transformer file detected - handles multiple data sources',
1851
+ 'Consider documenting input/output schemas',
1852
+ ];
1853
+ case 'nextjs-page':
1854
+ return [
1855
+ 'Next.js App Router page detected - metadata/JSON-LD/component pattern is cohesive',
1856
+ 'Multiple exports (metadata, faqJsonLd, default) serve single page purpose',
1857
+ ];
1175
1858
  case 'mixed-concerns':
1176
1859
  return [
1177
1860
  'Consider splitting this file by domain',