@sun-asterisk/sunlint 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/CHANGELOG.md +115 -1
  2. package/CONTRIBUTING.md +249 -605
  3. package/README.md +3 -4
  4. package/config/ci-cd.json +54 -0
  5. package/config/development.json +56 -0
  6. package/config/large-project.json +143 -0
  7. package/config/presets/all.json +0 -1
  8. package/config/release.json +70 -0
  9. package/config/rule-analysis-strategies.js +38 -3
  10. package/config/rules/enhanced-rules-registry.json +474 -1179
  11. package/config/rules/rules-registry-generated.json +3 -3
  12. package/core/cli-action-handler.js +24 -30
  13. package/core/cli-program.js +11 -3
  14. package/core/config-merger.js +29 -2
  15. package/core/enhanced-rules-registry.js +3 -2
  16. package/core/semantic-engine.js +129 -19
  17. package/core/semantic-rule-base.js +4 -2
  18. package/core/unified-rule-registry.js +1 -1
  19. package/docs/COMMAND-EXAMPLES.md +134 -0
  20. package/docs/LARGE-PROJECT-GUIDE.md +324 -0
  21. package/engines/heuristic-engine.js +135 -16
  22. package/integrations/eslint/plugin/index.js +0 -2
  23. package/integrations/eslint/plugin/rules/common/c003-no-vague-abbreviations.js +59 -1
  24. package/integrations/eslint/plugin/rules/common/c006-function-name-verb-noun.js +26 -1
  25. package/integrations/eslint/plugin/rules/common/c030-use-custom-error-classes.js +54 -19
  26. package/origin-rules/common-en.md +19 -15
  27. package/package.json +1 -1
  28. package/rules/common/C002_no_duplicate_code/analyzer.js +334 -36
  29. package/rules/common/C003_no_vague_abbreviations/analyzer.js +220 -35
  30. package/rules/common/C006_function_naming/analyzer.js +29 -3
  31. package/rules/common/C010_limit_block_nesting/analyzer.js +181 -337
  32. package/rules/common/C010_limit_block_nesting/config.json +64 -0
  33. package/rules/common/C010_limit_block_nesting/regex-based-analyzer.js +379 -0
  34. package/rules/common/C010_limit_block_nesting/symbol-based-analyzer.js +231 -0
  35. package/rules/common/C013_no_dead_code/analyzer.js +75 -177
  36. package/rules/common/C013_no_dead_code/config.json +61 -0
  37. package/rules/common/C013_no_dead_code/regex-based-analyzer.js +345 -0
  38. package/rules/common/C013_no_dead_code/symbol-based-analyzer.js +640 -0
  39. package/rules/common/C014_dependency_injection/analyzer.js +48 -313
  40. package/rules/common/C014_dependency_injection/config.json +26 -0
  41. package/rules/common/C014_dependency_injection/symbol-based-analyzer.js +751 -0
  42. package/rules/common/C017_constructor_logic/analyzer.js +254 -17
  43. package/rules/common/C017_constructor_logic/semantic-analyzer.js +340 -0
  44. package/rules/common/C018_no_throw_generic_error/analyzer.js +232 -0
  45. package/rules/common/C018_no_throw_generic_error/config.json +50 -0
  46. package/rules/common/C018_no_throw_generic_error/regex-based-analyzer.js +387 -0
  47. package/rules/common/C018_no_throw_generic_error/symbol-based-analyzer.js +314 -0
  48. package/rules/common/C019_log_level_usage/analyzer.js +110 -317
  49. package/rules/common/C019_log_level_usage/pattern-analyzer.js +88 -0
  50. package/rules/common/C019_log_level_usage/system-log-analyzer.js +1267 -0
  51. package/rules/common/C023_no_duplicate_variable/analyzer.js +180 -0
  52. package/rules/common/C023_no_duplicate_variable/config.json +50 -0
  53. package/rules/common/C023_no_duplicate_variable/symbol-based-analyzer.js +158 -0
  54. package/rules/common/C024_no_scatter_hardcoded_constants/analyzer.js +180 -0
  55. package/rules/common/C024_no_scatter_hardcoded_constants/config.json +50 -0
  56. package/rules/common/C024_no_scatter_hardcoded_constants/symbol-based-analyzer.js +181 -0
  57. package/rules/common/C030_use_custom_error_classes/analyzer.js +200 -0
  58. package/rules/common/C033_separate_service_repository/README.md +78 -0
  59. package/rules/common/C033_separate_service_repository/analyzer.js +160 -0
  60. package/rules/common/C033_separate_service_repository/config.json +50 -0
  61. package/rules/common/C033_separate_service_repository/regex-based-analyzer.js +585 -0
  62. package/rules/common/C033_separate_service_repository/symbol-based-analyzer.js +368 -0
  63. package/rules/common/C035_error_logging_context/STRATEGY.md +99 -0
  64. package/rules/common/C035_error_logging_context/analyzer.js +232 -0
  65. package/rules/common/C035_error_logging_context/config.json +54 -0
  66. package/rules/common/C035_error_logging_context/regex-based-analyzer.js +299 -0
  67. package/rules/common/C035_error_logging_context/symbol-based-analyzer.js +454 -0
  68. package/rules/common/C040_centralized_validation/analyzer.js +165 -0
  69. package/rules/common/C040_centralized_validation/config.json +46 -0
  70. package/rules/common/C040_centralized_validation/regex-based-analyzer.js +243 -0
  71. package/rules/common/C040_centralized_validation/symbol-based-analyzer.js +416 -0
  72. package/rules/common/{C076_single_test_behavior → C072_single_test_behavior}/analyzer.js +6 -6
  73. package/rules/common/C076_explicit_function_types/README.md +30 -0
  74. package/rules/common/C076_explicit_function_types/analyzer.js +172 -0
  75. package/rules/common/C076_explicit_function_types/config.json +15 -0
  76. package/rules/common/C076_explicit_function_types/semantic-analyzer.js +341 -0
  77. package/rules/index.js +6 -1
  78. package/rules/parser/rule-parser.js +13 -2
  79. package/rules/security/S005_no_origin_auth/README.md +226 -0
  80. package/rules/security/S005_no_origin_auth/analyzer.js +184 -0
  81. package/rules/security/S005_no_origin_auth/ast-analyzer.js +406 -0
  82. package/rules/security/S005_no_origin_auth/config.json +85 -0
  83. package/rules/security/S006_no_plaintext_recovery_codes/README.md +139 -0
  84. package/rules/security/S006_no_plaintext_recovery_codes/analyzer.js +306 -0
  85. package/rules/security/S006_no_plaintext_recovery_codes/config.json +48 -0
  86. package/rules/security/S007_no_plaintext_otp/README.md +198 -0
  87. package/rules/security/S007_no_plaintext_otp/analyzer.js +406 -0
  88. package/rules/security/S007_no_plaintext_otp/config.json +79 -0
  89. package/rules/security/S007_no_plaintext_otp/semantic-analyzer.js +609 -0
  90. package/rules/security/S007_no_plaintext_otp/semantic-config.json +195 -0
  91. package/rules/security/S007_no_plaintext_otp/semantic-wrapper.js +280 -0
  92. package/rules/security/S009_no_insecure_encryption/README.md +158 -0
  93. package/rules/security/S009_no_insecure_encryption/analyzer.js +319 -0
  94. package/rules/security/S009_no_insecure_encryption/config.json +55 -0
  95. package/rules/security/S010_no_insecure_encryption/README.md +224 -0
  96. package/rules/security/S010_no_insecure_encryption/analyzer.js +493 -0
  97. package/rules/security/S010_no_insecure_encryption/config.json +48 -0
  98. package/rules/security/S016_no_sensitive_querystring/STRATEGY.md +149 -0
  99. package/rules/security/S016_no_sensitive_querystring/analyzer.js +276 -0
  100. package/rules/security/S016_no_sensitive_querystring/config.json +127 -0
  101. package/rules/security/S016_no_sensitive_querystring/regex-based-analyzer.js +258 -0
  102. package/rules/security/S016_no_sensitive_querystring/symbol-based-analyzer.js +495 -0
  103. package/rules/security/S027_no_hardcoded_secrets/analyzer.js +180 -366
  104. package/rules/security/S027_no_hardcoded_secrets/categories.json +153 -0
  105. package/rules/security/S027_no_hardcoded_secrets/categorized-analyzer.js +250 -0
  106. package/rules/security/S048_no_current_password_in_reset/README.md +222 -0
  107. package/rules/security/S048_no_current_password_in_reset/analyzer.js +366 -0
  108. package/rules/security/S048_no_current_password_in_reset/config.json +48 -0
  109. package/rules/security/S055_content_type_validation/README.md +176 -0
  110. package/rules/security/S055_content_type_validation/analyzer.js +312 -0
  111. package/rules/security/S055_content_type_validation/config.json +48 -0
  112. package/rules/utils/rule-helpers.js +140 -1
  113. package/scripts/consolidate-config.js +116 -0
  114. package/scripts/prepare-release.sh +1 -1
  115. package/config/rules/rules-registry.json +0 -765
  116. package/docs/ESLINT-INTEGRATION-STRATEGY.md +0 -392
  117. package/docs/FUTURE_PACKAGES.md +0 -83
  118. package/docs/HEURISTIC_VS_AI.md +0 -113
  119. package/docs/PRODUCTION_DEPLOYMENT_ANALYSIS.md +0 -112
  120. package/docs/PRODUCTION_SIZE_IMPACT.md +0 -183
  121. package/docs/RELEASE_GUIDE.md +0 -230
  122. package/docs/STANDARDIZED-CATEGORY-FILTERING.md +0 -156
  123. package/integrations/eslint/plugin/rules/common/c076-single-behavior-per-test.js +0 -254
  124. package/rules/common/C006_function_naming/smart-analyzer.js +0 -503
@@ -79,6 +79,22 @@ const c006Rule = {
79
79
  ...commonVerbPrefixes,
80
80
  ...(options.allowedVerbs || [])
81
81
  ]);
82
+
83
+ // Generic/vague verbs that should be flagged even if they are technically verbs
84
+ const genericVerbs = new Set([
85
+ 'do', 'handle', 'process', 'manage', 'execute', 'work', 'stuff', 'thing', 'data'
86
+ ]);
87
+
88
+ function isGenericVerbUsage(name) {
89
+ // Check if the function name is exactly a generic verb or starts with generic verb + something generic
90
+ const genericPatterns = [
91
+ /^(do|handle|process|manage|execute)(Something|Stuff|Data|Info|Work|Thing|Items|Objects?)$/i,
92
+ /^(do|handle|process|manage|execute)$/i,
93
+ /^(do|handle|process|manage|execute)[A-Z].*$/i // Any pattern starting with generic verb + capital letter
94
+ ];
95
+
96
+ return genericPatterns.some(pattern => pattern.test(name));
97
+ }
82
98
 
83
99
  const allowConstructors = options.allowConstructors !== false;
84
100
 
@@ -142,7 +158,16 @@ const c006Rule = {
142
158
 
143
159
  // Check if it follows verb-noun pattern
144
160
  if (isVerbNounPattern(name)) {
145
- return; // Good! Follows the pattern
161
+ // But still check if it's using generic verbs that should be flagged
162
+ if (isGenericVerbUsage(name)) {
163
+ context.report({
164
+ node,
165
+ messageId: "notVerbNoun",
166
+ data: { name }
167
+ });
168
+ return;
169
+ }
170
+ return; // Good! Follows the pattern and not generic
146
171
  }
147
172
 
148
173
  // Check if it's likely a noun-only name
@@ -52,6 +52,10 @@ const c030Rule = {
52
52
  messages: {
53
53
  useCustomError: "Use custom error class instead of generic 'Error'. Consider using specific error types like ValidationError, NotFoundError, BusinessRuleError, etc. Vietnamese: 'Dùng custom error class thay vì Error generic'",
54
54
  useSpecificBuiltin: "Consider using a more specific built-in error type like TypeError, RangeError, or a custom error class. Vietnamese: 'Cân nhắc dùng built-in error cụ thể hơn hoặc custom error class'",
55
+ throwStringLiteral: "Use custom error classes instead of throwing string literals",
56
+ throwTemplateLiteral: "Use custom error classes instead of throwing template literals",
57
+ throwNumber: "Use custom error classes instead of throwing numbers",
58
+ throwVariable: "Use custom error classes instead of throwing variables",
55
59
  missingErrorCode: "Custom error class should include an error code property. Vietnamese: 'Custom error class nên có thuộc tính error code'",
56
60
  missingStatusCode: "HTTP-related error class should include a status code property. Vietnamese: 'Error class liên quan HTTP nên có thuộc tính status code'",
57
61
  preferCustomError: "Prefer custom error classes for better error categorization and handling. Vietnamese: 'Ưu tiên custom error classes để phân loại và xử lý lỗi tốt hơn'"
@@ -223,35 +227,66 @@ const c030Rule = {
223
227
  // Skip rethrow statements if allowed
224
228
  if (isRethrowStatement(node)) return;
225
229
 
226
- const errorClassName = getErrorClassName(node);
230
+ // Handle different throw argument types
231
+ if (node.argument) {
232
+ // Check for new Error(...) constructors
233
+ if (node.argument.type === 'NewExpression' &&
234
+ node.argument.callee &&
235
+ node.argument.callee.name === 'Error') {
236
+ context.report({
237
+ node: node.argument,
238
+ messageId: "useCustomError"
239
+ });
240
+ return;
241
+ }
227
242
 
228
- // Check for generic Error usage
229
- if (errorClassName === 'Error') {
230
- context.report({
231
- node: node.argument,
232
- messageId: "useCustomError"
233
- });
234
- return;
235
- }
243
+ // Check for other built-in error constructors
244
+ if (node.argument.type === 'NewExpression' &&
245
+ node.argument.callee &&
246
+ allowedBuiltinErrors.has(node.argument.callee.name)) {
247
+ if (['TypeError', 'RangeError'].includes(node.argument.callee.name)) {
248
+ context.report({
249
+ node: node.argument,
250
+ messageId: "useSpecificBuiltin"
251
+ });
252
+ }
253
+ return;
254
+ }
236
255
 
237
- // In strict mode, only custom errors are allowed
238
- if (strictMode && errorClassName) {
239
- if (!isCustomErrorClass(errorClassName) && !allowedBuiltinErrors.has(errorClassName)) {
256
+ // Check for throwing string literals
257
+ if (node.argument.type === 'Literal' && typeof node.argument.value === 'string') {
240
258
  context.report({
241
259
  node: node.argument,
242
- messageId: "preferCustomError"
260
+ messageId: "throwStringLiteral"
243
261
  });
262
+ return;
263
+ }
264
+
265
+ // Check for throwing template literals
266
+ if (node.argument.type === 'TemplateLiteral') {
267
+ context.report({
268
+ node: node.argument,
269
+ messageId: "throwTemplateLiteral"
270
+ });
271
+ return;
272
+ }
273
+
274
+ // Check for throwing numbers
275
+ if (node.argument.type === 'Literal' && typeof node.argument.value === 'number') {
276
+ context.report({
277
+ node: node.argument,
278
+ messageId: "throwNumber"
279
+ });
280
+ return;
244
281
  }
245
- }
246
282
 
247
- // Check for other built-in errors that could be more specific
248
- if (allowedBuiltinErrors.has(errorClassName) && !strictMode) {
249
- // Only suggest if it's a generic built-in error
250
- if (['TypeError', 'RangeError'].includes(errorClassName)) {
283
+ // Check for throwing variables (identifiers)
284
+ if (node.argument.type === 'Identifier') {
251
285
  context.report({
252
286
  node: node.argument,
253
- messageId: "useSpecificBuiltin"
287
+ messageId: "throwVariable"
254
288
  });
289
+ return;
255
290
  }
256
291
  }
257
292
  },
@@ -269,13 +269,17 @@
269
269
 
270
270
  ### 📘 Rule C019 – Do not use `error` log level for non-critical issues
271
271
 
272
- - **Objective**: Avoid noisy logs and false alarms; ensure meaningful log levels.
273
- - **Details**:
274
- - Use `info` or `warn` for recoverable or normal issues
275
- - Reserve `error` for critical failures that require immediate attention
276
- - Use `warn` for potential issues that don't crash the system
277
- - Use `info` for normal business flow events
278
- - Use `debug` for detailed information when troubleshooting
272
+ - **Objective**: Prevent noisy logs and false alarms; ensure consistent and meaningful log levels across the system.
273
+ - **Details**:
274
+ - Reserve `error` for critical failures that require immediate attention or system intervention.
275
+ - Use `warn` for potential issues that may affect functionality but don’t crash the system (e.g., retryable errors).
276
+ - Use `info` for normal business events (e.g., login, order success, expected validation failures).
277
+ - Use `debug` for detailed troubleshooting information; avoid excessive debug logs in production.
278
+ - Avoid using `error` for:
279
+ - Expected business cases (e.g., wrong password, expired card).
280
+ - Normal validation failures.
281
+ - Temporary, recoverable conditions (e.g., network retry).
282
+ - Additional goal: Ensure **logs exist at the right places with the right severity level**, avoiding both over-logging and missing critical logs.
279
283
  - **Applies to**: All languages
280
284
  - **Tools**: Log linter / Custom rule
281
285
  - **Principles**: CODE_QUALITY
@@ -675,7 +679,7 @@
675
679
 
676
680
  - **Objective**: Protect sensitive application data, avoid security risks, and comply with security standards. Exposing sensitive information can lead to serious security and privacy issues.
677
681
 
678
- - **Details**
682
+ - **Details**:
679
683
  - Use environment variables or separate config files to store secrets
680
684
  - Add secret files to `.gitignore` to prevent committing them
681
685
  - Use secret management tools such as Vault or AWS Secrets Manager
@@ -692,7 +696,7 @@
692
696
 
693
697
  - **Objective**: Ensure clarity and readability by making boolean variables self-explanatory. This naming convention improves code maintainability and documentation.
694
698
 
695
- - **Details**
699
+ - **Details**:
696
700
  - Use `is` for state attributes (e.g., `isActive`, `isEnabled`)
697
701
  - Use `has` for ownership (e.g., `hasPermission`, `hasChildren`)
698
702
  - Use `should` for decision flags (e.g., `shouldUpdate`, `shouldRetry`)
@@ -709,7 +713,7 @@
709
713
 
710
714
  - **Objective**: Ensure logging is done in a controlled and effective manner in production. Using `print` or `console.log` can lead to performance issues, security risks, and log management difficulties.
711
715
 
712
- - **Details**
716
+ - **Details**:
713
717
  - Use a dedicated logging framework instead of `print` or `console.log`
714
718
  - Set appropriate log levels for each environment (debug, info, warn, error)
715
719
  - Ensure logs contain useful metadata like timestamp, level, and context
@@ -726,7 +730,7 @@
726
730
 
727
731
  - **Objective**: Leverage well-tested, optimized, and community-maintained libraries to reduce bugs and improve development efficiency.
728
732
 
729
- - **Details**
733
+ - **Details**:
730
734
  - Prefer using standard language libraries
731
735
  - Use trusted and popular community libraries
732
736
  - Evaluate library compatibility and performance
@@ -743,7 +747,7 @@
743
747
 
744
748
  - **Objective**: Ensure APIs return appropriate HTTP status codes so clients can handle errors effectively. HTTP 500 should be reserved for unexpected system errors.
745
749
 
746
- - **Details**
750
+ - **Details**:
747
751
  - Use specific HTTP status codes based on error type:
748
752
  - 400 for validation errors
749
753
  - 401 for authentication failures
@@ -763,7 +767,7 @@
763
767
 
764
768
  - **Objective**: Keep code readable, maintainable, and efficient by avoiding the use of overly complex regular expressions in business-critical logic.
765
769
 
766
- - **Details**
770
+ - **Details**:
767
771
  - Move complex regex into constants or helper functions
768
772
  - Prefer string manipulation libraries over complex regex
769
773
  - Break down complex regex into simpler processing steps
@@ -781,7 +785,7 @@
781
785
 
782
786
  - **Objective**: Centralize retry logic to improve consistency, maintainability, and observability of error handling and retry mechanisms.
783
787
 
784
- - **Details**
788
+ - **Details**:
785
789
  - Create a dedicated utility class or service for retry logic
786
790
  - Centralize retry policy configuration (retry count, delay, backoff)
787
791
  - Use decorator pattern or AOP to apply retry logic
@@ -799,7 +803,7 @@
799
803
 
800
804
  - **Objective**: Maintain a clear layered architecture, ensuring logic and data flow are well-structured and maintainable.
801
805
 
802
- - **Details**
806
+ - **Details**:
803
807
  - Controllers should only call Services, not Repositories directly
804
808
  - Services should only call Repositories, not Controllers
805
809
  - Repositories should only handle data access, not call Services
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sun-asterisk/sunlint",
3
- "version": "1.3.0",
3
+ "version": "1.3.2",
4
4
  "description": "☀️ SunLint - Multi-language static analysis tool for code quality and security | Sun* Engineering Standards",
5
5
  "main": "cli.js",
6
6
  "bin": {
@@ -1,64 +1,362 @@
1
1
  /**
2
- * C002_no_duplicate_code - Heuristic Rule Analyzer
2
+ * C002_no_duplicate_code - Enhanced Regex-based Rule Analyzer
3
3
  * Category: coding
4
4
  *
5
- * TODO: Migrate logic from ESLint rule
6
- * ESLint rule: integrations/eslint/plugin/rules/coding/c002-no_duplicate_code.js
5
+ * Detects duplicate code blocks longer than specified threshold (default: 10 lines)
6
+ * Uses regex-based approach with proper comment filtering for multi-language support
7
7
  */
8
8
 
9
- const ts = require('typescript');
10
- const { PatternMatcher } = require('../../utils/pattern-matchers');
11
- const { RuleHelper } = require('../../utils/rule-helpers');
9
+ const fs = require('fs');
10
+ const path = require('path');
11
+ const { CommentDetector } = require('../../utils/rule-helpers');
12
12
 
13
13
  class C002_no_duplicate_codeAnalyzer {
14
14
  constructor(config = {}) {
15
- this.config = config;
16
- this.patternMatcher = new PatternMatcher();
17
- this.helper = new RuleHelper();
15
+ this.config = {
16
+ minLines: config.minLines || 5,
17
+ ignoreComments: config.ignoreComments !== false,
18
+ ignoreWhitespace: config.ignoreWhitespace !== false,
19
+ ignoreEmptyLines: config.ignoreEmptyLines !== false,
20
+ similarityThreshold: config.similarityThreshold || 0.80, // 80% similarity
21
+ ...config
22
+ };
23
+ this.codeBlocks = new Map();
24
+ this.reportedBlocks = new Set();
18
25
  }
19
26
 
20
27
  /**
21
- * Analyze code content for rule violations
22
- * @param {string} content - File content
23
- * @param {string} filePath - File path
24
- * @param {Object} context - Analysis context
28
+ * Analyze files for duplicate code violations (heuristic engine interface)
29
+ * @param {Array} files - Array of file paths
30
+ * @param {string} language - Programming language
31
+ * @param {Object} options - Analysis options
25
32
  * @returns {Array} Array of violations
26
33
  */
27
- analyze(content, filePath, context = {}) {
34
+ analyze(files, language, options = {}) {
28
35
  const violations = [];
29
36
 
30
- // TODO: Implement heuristic analysis logic
31
- // This should replicate the ESLint rule behavior using pattern matching
32
-
33
37
  try {
34
- // Example pattern-based analysis
35
- // const patterns = this.getViolationPatterns();
36
- // const matches = this.patternMatcher.findMatches(content, patterns);
37
- //
38
- // matches.forEach(match => {
39
- // violations.push(this.helper.createViolation({
40
- // ruleId: 'C002_no_duplicate_code',
41
- // message: 'Rule violation detected',
42
- // line: match.line,
43
- // column: match.column,
44
- // severity: 'error'
45
- // }));
46
- // });
38
+ console.log(`[C002 DEBUG] Analyzing ${files.length} files for duplicate code`);
39
+
40
+ // Reset state for new analysis
41
+ this.reset();
42
+
43
+ // Collect all code blocks from all files
44
+ const allCodeBlocks = [];
45
+
46
+ for (const filePath of files) {
47
+ console.log(`[C002 DEBUG] Processing file: ${filePath}`);
48
+ const content = this.readFileContent(filePath);
49
+ if (content) {
50
+ console.log(`[C002 DEBUG] File content length: ${content.length}`);
51
+ const codeBlocks = this.extractCodeBlocks(content, filePath);
52
+ console.log(`[C002 DEBUG] Extracted ${codeBlocks.length} code blocks from ${filePath}`);
53
+ codeBlocks.forEach((block, i) => {
54
+ console.log(`[C002 DEBUG] Block ${i}: ${block.type} at lines ${block.startLine}-${block.endLine} (${block.lineCount} lines)`);
55
+ });
56
+ allCodeBlocks.push(...codeBlocks);
57
+ }
58
+ }
59
+
60
+ console.log(`[C002 DEBUG] Total code blocks: ${allCodeBlocks.length}`);
61
+
62
+ // Find duplicates across all files
63
+ const duplicates = this.findDuplicates(allCodeBlocks);
64
+ console.log(`[C002 DEBUG] Found ${duplicates.length} duplicate groups`);
65
+
66
+ // Generate violations for each file
67
+ files.forEach(filePath => {
68
+ duplicates.forEach(duplicate => {
69
+ const fileViolations = this.createViolations(duplicate, filePath);
70
+ console.log(`[C002 DEBUG] Created ${fileViolations.length} violations for ${filePath}`);
71
+ violations.push(...fileViolations);
72
+ });
73
+ });
47
74
 
48
75
  } catch (error) {
49
- console.warn(`Error analyzing ${filePath} with C002_no_duplicate_code:`, error.message);
76
+ console.warn(`Error analyzing files with C002:`, error.message, error.stack);
50
77
  }
51
78
 
79
+ console.log(`[C002 DEBUG] Total violations: ${violations.length}`);
52
80
  return violations;
53
81
  }
54
82
 
55
83
  /**
56
- * Get violation patterns for this rule
57
- * @returns {Array} Array of patterns to match
84
+ * Read file content safely
85
+ * @param {string} filePath - Path to file
86
+ * @returns {string|null} File content or null if error
87
+ */
88
+ readFileContent(filePath) {
89
+ try {
90
+ return fs.readFileSync(filePath, 'utf8');
91
+ } catch (error) {
92
+ console.warn(`C002: Cannot read file ${filePath}:`, error.message);
93
+ return null;
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Extract code blocks from content
99
+ * @param {string} content - File content
100
+ * @param {string} filePath - File path for context
101
+ * @returns {Array} Array of code blocks with metadata
102
+ */
103
+ extractCodeBlocks(content, filePath) {
104
+ const lines = content.split('\n');
105
+ const blocks = [];
106
+
107
+ // Extract function blocks, class methods, etc.
108
+ const functionPattern = /^\s*(function\s+\w+|const\s+\w+\s*=\s*(async\s+)?\([^)]*\)\s*=>|class\s+\w+|\w+\s*\([^)]*\)\s*:\s*[^{]*\{)/;
109
+ let currentBlock = null;
110
+ let braceLevel = 0;
111
+
112
+ lines.forEach((line, index) => {
113
+ const lineNum = index + 1;
114
+ const trimmedLine = line.trim();
115
+
116
+ // Use CommentDetector to filter out comments
117
+ const filteredLines = CommentDetector.filterCommentLines([line]);
118
+ if (filteredLines[0].isComment) {
119
+ return;
120
+ }
121
+
122
+ // Skip empty lines if configured
123
+ if (this.config.ignoreEmptyLines && !trimmedLine) {
124
+ return;
125
+ }
126
+
127
+ // Detect function/method/class start
128
+ if (functionPattern.test(trimmedLine)) {
129
+ currentBlock = {
130
+ startLine: lineNum,
131
+ lines: [line],
132
+ filePath: filePath,
133
+ type: this.detectBlockType(trimmedLine)
134
+ };
135
+ braceLevel = (line.match(/{/g) || []).length - (line.match(/}/g) || []).length;
136
+ } else if (currentBlock) {
137
+ currentBlock.lines.push(line);
138
+ braceLevel += (line.match(/{/g) || []).length - (line.match(/}/g) || []).length;
139
+
140
+ // End of block
141
+ if (braceLevel <= 0) {
142
+ currentBlock.endLine = lineNum;
143
+ currentBlock.lineCount = currentBlock.lines.length;
144
+
145
+ // Only consider blocks that meet minimum line requirement
146
+ if (currentBlock.lineCount >= this.config.minLines) {
147
+ currentBlock.normalizedCode = this.normalizeCode(currentBlock.lines.join('\n'));
148
+ if (currentBlock.normalizedCode.length > 20) { // Skip if too short after normalization
149
+ blocks.push(currentBlock);
150
+ }
151
+ }
152
+ currentBlock = null;
153
+ braceLevel = 0;
154
+ }
155
+ }
156
+ });
157
+
158
+ return blocks;
159
+ }
160
+
161
+ /**
162
+ * Detect the type of code block
163
+ * @param {string} line - First line of the block
164
+ * @returns {string} Block type
165
+ */
166
+ detectBlockType(line) {
167
+ if (line.includes('function')) return 'function';
168
+ if (line.includes('class')) return 'class';
169
+ if (line.includes('interface')) return 'interface';
170
+ if (line.includes('=>')) return 'arrow-function';
171
+ return 'method';
172
+ }
173
+
174
+ /**
175
+ * Normalize code for comparison
176
+ * @param {string} code - Raw code
177
+ * @returns {string} Normalized code
178
+ */
179
+ normalizeCode(code) {
180
+ let normalized = code;
181
+
182
+ if (this.config.ignoreComments) {
183
+ // Remove single line comments (// comments)
184
+ normalized = normalized.replace(/\/\/.*$/gm, '');
185
+ // Remove multi-line comments (/* comments */)
186
+ normalized = normalized.replace(/\/\*[\s\S]*?\*\//g, '');
187
+ // Remove # comments (for other languages)
188
+ normalized = normalized.replace(/#.*$/gm, '');
189
+ }
190
+
191
+ if (this.config.ignoreWhitespace) {
192
+ // Normalize whitespace
193
+ normalized = normalized
194
+ .replace(/\s+/g, ' ') // Multiple spaces to single space
195
+ .replace(/\s*{\s*/g, '{') // Remove spaces around braces
196
+ .replace(/\s*}\s*/g, '}')
197
+ .replace(/\s*;\s*/g, ';') // Remove spaces around semicolons
198
+ .replace(/\s*,\s*/g, ',') // Remove spaces around commas
199
+ .trim();
200
+ }
201
+
202
+ if (this.config.ignoreEmptyLines) {
203
+ // Remove empty lines
204
+ normalized = normalized
205
+ .split('\n')
206
+ .filter(line => line.trim().length > 0)
207
+ .join('\n');
208
+ }
209
+
210
+ console.log(`[C002 DEBUG] Normalized code block:
211
+ ${normalized}
212
+ ---`);
213
+
214
+ return normalized;
215
+ }
216
+
217
+ /**
218
+ * Find duplicate code blocks
219
+ * @param {Array} blocks - Array of code blocks
220
+ * @returns {Array} Array of duplicate groups
221
+ */
222
+ findDuplicates(blocks) {
223
+ const duplicateGroups = [];
224
+ const processedBlocks = new Set();
225
+
226
+ for (let i = 0; i < blocks.length; i++) {
227
+ if (processedBlocks.has(i)) continue;
228
+
229
+ const currentBlock = blocks[i];
230
+ const duplicates = [currentBlock];
231
+
232
+ for (let j = i + 1; j < blocks.length; j++) {
233
+ if (processedBlocks.has(j)) continue;
234
+
235
+ const otherBlock = blocks[j];
236
+ const similarity = this.calculateSimilarity(
237
+ currentBlock.normalizedCode,
238
+ otherBlock.normalizedCode
239
+ );
240
+
241
+ if (similarity >= this.config.similarityThreshold) {
242
+ duplicates.push(otherBlock);
243
+ processedBlocks.add(j);
244
+ }
245
+ }
246
+
247
+ if (duplicates.length > 1) {
248
+ duplicateGroups.push(duplicates);
249
+ processedBlocks.add(i);
250
+ }
251
+ }
252
+
253
+ return duplicateGroups;
254
+ }
255
+
256
+ /**
257
+ * Calculate similarity between two code strings
258
+ * @param {string} code1 - First code string
259
+ * @param {string} code2 - Second code string
260
+ * @returns {number} Similarity ratio (0-1)
261
+ */
262
+ calculateSimilarity(code1, code2) {
263
+ if (code1 === code2) return 1.0;
264
+
265
+ // Use Levenshtein distance for similarity calculation
266
+ const longer = code1.length > code2.length ? code1 : code2;
267
+ const shorter = code1.length > code2.length ? code2 : code1;
268
+
269
+ if (longer.length === 0) return 1.0;
270
+
271
+ const distance = this.levenshteinDistance(longer, shorter);
272
+ return (longer.length - distance) / longer.length;
273
+ }
274
+
275
+ /**
276
+ * Calculate Levenshtein distance between two strings
277
+ * @param {string} str1 - First string
278
+ * @param {string} str2 - Second string
279
+ * @returns {number} Edit distance
280
+ */
281
+ levenshteinDistance(str1, str2) {
282
+ const matrix = Array(str2.length + 1).fill().map(() => Array(str1.length + 1).fill(0));
283
+
284
+ for (let i = 0; i <= str1.length; i++) matrix[0][i] = i;
285
+ for (let j = 0; j <= str2.length; j++) matrix[j][0] = j;
286
+
287
+ for (let j = 1; j <= str2.length; j++) {
288
+ for (let i = 1; i <= str1.length; i++) {
289
+ const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
290
+ matrix[j][i] = Math.min(
291
+ matrix[j - 1][i] + 1, // deletion
292
+ matrix[j][i - 1] + 1, // insertion
293
+ matrix[j - 1][i - 1] + cost // substitution
294
+ );
295
+ }
296
+ }
297
+
298
+ return matrix[str2.length][str1.length];
299
+ }
300
+
301
+ /**
302
+ * Create violation objects for duplicate code
303
+ * @param {Array} duplicateGroup - Group of duplicate blocks
304
+ * @param {string} filePath - Current file path
305
+ * @returns {Array} Array of violation objects
306
+ */
307
+ createViolations(duplicateGroup, filePath) {
308
+ const violations = [];
309
+
310
+ duplicateGroup.forEach((block, index) => {
311
+ // Skip if not in current file or already reported
312
+ if (block.filePath !== filePath) return;
313
+
314
+ const blockId = `${block.filePath}:${block.startLine}-${block.endLine}`;
315
+ if (this.reportedBlocks.has(blockId)) return;
316
+
317
+ this.reportedBlocks.add(blockId);
318
+
319
+ violations.push({
320
+ ruleId: 'C002',
321
+ severity: 'error',
322
+ message: `Duplicate ${block.type} found (${block.lineCount} lines). Consider extracting into a shared function or module. Found ${duplicateGroup.length} similar blocks.`,
323
+ line: block.startLine,
324
+ column: 1,
325
+ endLine: block.endLine,
326
+ endColumn: 1,
327
+ filePath: filePath, // Add filePath field for engine compatibility
328
+ data: {
329
+ lineCount: block.lineCount,
330
+ blockType: block.type,
331
+ duplicateCount: duplicateGroup.length,
332
+ locations: duplicateGroup.map(b => `${path.basename(b.filePath)}:${b.startLine}-${b.endLine}`)
333
+ }
334
+ });
335
+ });
336
+
337
+ return violations;
338
+ }
339
+
340
+ /**
341
+ * Reset analyzer state for new analysis session
342
+ */
343
+ reset() {
344
+ this.codeBlocks.clear();
345
+ this.reportedBlocks.clear();
346
+ }
347
+
348
+ /**
349
+ * Get configuration for this rule
350
+ * @returns {Object} Configuration object
58
351
  */
59
- getViolationPatterns() {
60
- // TODO: Define patterns based on ESLint rule logic
61
- return [];
352
+ getConfig() {
353
+ return {
354
+ minLines: this.config.minLines,
355
+ ignoreComments: this.config.ignoreComments,
356
+ ignoreWhitespace: this.config.ignoreWhitespace,
357
+ ignoreEmptyLines: this.config.ignoreEmptyLines,
358
+ similarityThreshold: this.config.similarityThreshold
359
+ };
62
360
  }
63
361
  }
64
362