@aiready/context-analyzer 0.9.13 → 0.9.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1020,14 +1020,13 @@ async function analyzeContext(options) {
1020
1020
  });
1021
1021
  }
1022
1022
  const allResults = [...results, ...pythonResults];
1023
- const issuesOnly = allResults.filter((r) => r.severity !== "info");
1024
- const sorted = issuesOnly.sort((a, b) => {
1023
+ const sorted = allResults.sort((a, b) => {
1025
1024
  const severityOrder = { critical: 0, major: 1, minor: 2, info: 3 };
1026
1025
  const severityDiff = severityOrder[a.severity] - severityOrder[b.severity];
1027
1026
  if (severityDiff !== 0) return severityDiff;
1028
1027
  return b.contextBudget - a.contextBudget;
1029
1028
  });
1030
- return sorted.length > 0 ? sorted : results;
1029
+ return sorted;
1031
1030
  }
1032
1031
  function generateSummary(results) {
1033
1032
  if (results.length === 0) {
package/dist/cli.mjs CHANGED
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  analyzeContext,
4
4
  generateSummary
5
- } from "./chunk-BD4NWUVG.mjs";
5
+ } from "./chunk-VTALAPQZ.mjs";
6
6
  import "./chunk-Y6FXYEAI.mjs";
7
7
 
8
8
  // src/cli.ts
package/dist/index.js CHANGED
@@ -1244,14 +1244,13 @@ async function analyzeContext(options) {
1244
1244
  });
1245
1245
  }
1246
1246
  const allResults = [...results, ...pythonResults];
1247
- const issuesOnly = allResults.filter((r) => r.severity !== "info");
1248
- const sorted = issuesOnly.sort((a, b) => {
1247
+ const sorted = allResults.sort((a, b) => {
1249
1248
  const severityOrder = { critical: 0, major: 1, minor: 2, info: 3 };
1250
1249
  const severityDiff = severityOrder[a.severity] - severityOrder[b.severity];
1251
1250
  if (severityDiff !== 0) return severityDiff;
1252
1251
  return b.contextBudget - a.contextBudget;
1253
1252
  });
1254
- return sorted.length > 0 ? sorted : results;
1253
+ return sorted;
1255
1254
  }
1256
1255
  function generateSummary(results) {
1257
1256
  if (results.length === 0) {
package/dist/index.mjs CHANGED
@@ -10,7 +10,7 @@ import {
10
10
  getCoUsageData,
11
11
  getSmartDefaults,
12
12
  inferDomainFromSemantics
13
- } from "./chunk-BD4NWUVG.mjs";
13
+ } from "./chunk-VTALAPQZ.mjs";
14
14
  import "./chunk-Y6FXYEAI.mjs";
15
15
  export {
16
16
  analyzeContext,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aiready/context-analyzer",
3
- "version": "0.9.13",
3
+ "version": "0.9.16",
4
4
  "description": "AI context window cost analysis - detect fragmented code, deep import chains, and expensive context budgets",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",
@@ -49,7 +49,7 @@
49
49
  "commander": "^14.0.0",
50
50
  "chalk": "^5.3.0",
51
51
  "prompts": "^2.4.2",
52
- "@aiready/core": "0.9.13"
52
+ "@aiready/core": "0.9.16"
53
53
  },
54
54
  "devDependencies": {
55
55
  "@types/node": "^24.0.0",
package/src/index.ts CHANGED
@@ -299,20 +299,16 @@ export async function analyzeContext(
299
299
  // Merge Python and TS/JS results
300
300
  const allResults = [...results, ...pythonResults];
301
301
 
302
- // Filter to only files with actual issues (not just info)
303
- // This reduces output noise and focuses on actionable problems
304
- const issuesOnly = allResults.filter(r => r.severity !== 'info');
305
-
306
- // Sort by severity and context budget
307
- const sorted = issuesOnly.sort((a, b) => {
302
+ // Keep ALL results including info severity for visualization purposes
303
+ // The visualizer needs to show all files as nodes
304
+ const sorted = allResults.sort((a, b) => {
308
305
  const severityOrder = { critical: 0, major: 1, minor: 2, info: 3 };
309
306
  const severityDiff = severityOrder[a.severity] - severityOrder[b.severity];
310
307
  if (severityDiff !== 0) return severityDiff;
311
308
  return b.contextBudget - a.contextBudget;
312
309
  });
313
-
314
- // If we have issues, return them; otherwise return all results
315
- return sorted.length > 0 ? sorted : results;
310
+
311
+ return sorted;
316
312
  }
317
313
 
318
314
  /**
@@ -1,235 +0,0 @@
1
- # Semantic Analysis Validation Results
2
-
3
- **Date:** 14 January 2026
4
- **Test Project:** receiptclaimer (real-world Next.js application)
5
- **Analysis Version:** v0.7.0 (semantic analysis)
6
-
7
- ## Executive Summary
8
-
9
- ✅ Semantic analysis successfully deployed and validated on production codebase
10
- ✅ 181 files analyzed in 0.99s (~5.5ms per file)
11
- ✅ Identified 10 semantic domains with high accuracy
12
- ✅ Average cohesion: 75% (up from folder-based approach)
13
- ✅ Zero false positives or analysis failures
14
-
15
- ## Key Findings
16
-
17
- ### 1. Domain Identification Accuracy
18
-
19
- **Top Semantic Domains Detected:**
20
- - `partner`: 7 files, 97% fragmentation, 74% cohesion
21
- - `gift`: 6 files, 96% fragmentation, 78% cohesion
22
- - `google`: 4 files, 95% fragmentation, 90% cohesion
23
- - `shared`: 3 files, 100% fragmentation, 100% cohesion
24
- - `categorization`: 3 files, 100% fragmentation, 78% cohesion
25
-
26
- **Improvements Over Folder-Based:**
27
- - ✅ No more "unknown" domains for generic file names
28
- - ✅ Detected cross-cutting concerns (`shared`, `hook`)
29
- - ✅ Identified infrastructure domains (`google`, `export`)
30
- - ✅ Found business logic clusters (`partner`, `gift`, `mileage`)
31
-
32
- ### 2. Cohesion Analysis
33
-
34
- **Distribution:**
35
- - High cohesion (≥80%): Majority of files
36
- - Medium cohesion (40-80%): Some integration points
37
- - Low cohesion (<40%): Cross-cutting concerns (expected)
38
-
39
- **Average Cohesion: 75%**
40
- This is a strong indicator that semantic analysis correctly identifies when exports belong together vs. when files serve as integration points.
41
-
42
- ### 3. Fragmentation Detection
43
-
44
- **10 Fragmented Module Clusters Identified:**
45
-
46
- All clusters show high fragmentation (95-100%), indicating these domains are correctly scattered across the codebase for legitimate architectural reasons:
47
-
48
- - Partner management spread across API, UI, blog content
49
- - Gift functionality across admin, partner APIs, email templates
50
- - Google integrations across analytics, document AI, layout
51
-
52
- **This is correct behavior** - not all fragmentation is bad. Integration layers SHOULD reference multiple domains.
53
-
54
- ### 4. Performance
55
-
56
- ```
57
- Total files: 181
58
- Analysis time: 0.99s
59
- Per-file average: ~5.5ms
60
- ```
61
-
62
- **Semantic analysis overhead:** Minimal
63
- - Co-usage matrix building: Fast
64
- - Type graph construction: Fast
65
- - Confidence scoring: Negligible
66
-
67
- The 3-pass analysis (basic → semantic → enhancement) adds ~10-15% overhead compared to folder-based approach, but provides dramatically better accuracy.
68
-
69
- ## Semantic Analysis In Action
70
-
71
- ### Example: Partner Domain
72
-
73
- **Files Detected:**
74
- 1. `shared/src/types/partners.ts` - Type definitions
75
- 2. `web/lib/partners.ts` - Business logic
76
- 3. `web/app/partners/_lib/hooks.ts` - React hooks
77
- 4. `web/app/blog/property-managers-referral-program/content.tsx` - Content
78
- 5. `web/app/blog/accountant-referral-programs-australia/content.tsx` - Content
79
- 6. `web/app/api/partners/gifts/__tests__/test-helpers.ts` - Tests
80
- 7. `web/app/api/partners/gifts/__tests__/fixtures.ts` - Test fixtures
81
-
82
- **Why This Is Correct:**
83
- - All files relate to partner functionality
84
- - Spread across types, logic, UI, content, tests (appropriate separation)
85
- - Semantic analysis correctly identified them as belonging to same domain despite different folders
86
- - Fragmentation score 97% is accurate - these SHOULD be in different folders
87
-
88
- **Confidence Signals:**
89
- - ✅ **Type references** - All reference `Partner` types
90
- - ✅ **Co-usage** - Often imported together in partner features
91
- - ✅ **Import paths** - Import from `partners/` folders
92
- - ✅ **Folder structure** - Most in `partners/` related folders
93
-
94
- ### Example: Google Domain
95
-
96
- **Files Detected:**
97
- 1. `web/app/layout.tsx` - Google Analytics integration
98
- 2. `web/pages/api/internal/top-pages.ts` - Analytics API
99
- 3. `infra/lib/lambda/utils/google-document-ai-client.ts` - Document AI client
100
- 4. `infra/lib/lambda/documentai-adapter.ts` - Document AI adapter
101
-
102
- **Why This Is Correct:**
103
- - All files integrate with Google services
104
- - Layout → Analytics, Lambda → Document AI (different concerns)
105
- - 90% cohesion indicates strong semantic relationship despite different purposes
106
- - Correctly identified as infrastructure domain, not business logic
107
-
108
- **Confidence Signals:**
109
- - ✅ **Co-usage** - Google libraries imported together
110
- - ✅ **Type references** - Share Google API types
111
- - ✅ **Import paths** - Reference `google` in imports
112
-
113
- ## Comparison: Folder-Based vs. Semantic
114
-
115
- ### Before (Folder-Based Heuristics)
116
-
117
- **Problems:**
118
- - Generic file names → "unknown" domain
119
- - Folder structure assumed = semantic relationship
120
- - No confidence scores
121
- - Single domain per file
122
- - Missed cross-cutting concerns
123
-
124
- **Example Issue:**
125
- ```
126
- lib/session.ts → "unknown" (generic name)
127
- lib/dynamodb.ts → "unknown" (generic name)
128
- components/nav/nav-links.ts → "unknown" (generic name)
129
- ```
130
-
131
- ### After (Semantic Analysis)
132
-
133
- **Improvements:**
134
- - Real usage patterns → accurate domains
135
- - Co-usage + types > folder convention
136
- - Confidence scores show signal strength
137
- - Multi-domain support for integration points
138
- - Correctly identifies cross-cutting concerns
139
-
140
- **Example Fix:**
141
- ```
142
- lib/session.ts → "gift" domain (35% co-usage, 30% types)
143
- lib/dynamodb.ts → "customer" domain (imports from customers/)
144
- components/nav/nav-links.ts → "order" domain (imports from orders/)
145
- ```
146
-
147
- ## Validation Criteria
148
-
149
- | Criterion | Status | Evidence |
150
- |-----------|--------|----------|
151
- | **Accuracy** | ✅ Pass | All detected domains align with actual codebase structure |
152
- | **Performance** | ✅ Pass | <1s for 181 files, negligible overhead |
153
- | **Backward Compat** | ✅ Pass | `inferredDomain` still works, existing code unaffected |
154
- | **Zero Crashes** | ✅ Pass | No analysis failures or errors |
155
- | **Scalability** | ✅ Pass | O(n²) co-usage acceptable for typical codebases |
156
- | **Usefulness** | ✅ Pass | Consolidation recommendations are actionable |
157
-
158
- ## Consolidation Recommendations
159
-
160
- Based on semantic analysis, the tool correctly identified:
161
-
162
- 1. **Partner files (7 files)** - Consolidate into 3 files
163
- - Reason: High co-usage, shared types
164
- - Estimated savings: 4,022 tokens (30%)
165
-
166
- 2. **Gift files (6 files)** - Consolidate into 2 files
167
- - Reason: Very high co-usage
168
- - Estimated savings: 3,296 tokens (30%)
169
-
170
- 3. **Google files (4 files)** - Consolidate into 2 files
171
- - Reason: Infrastructure cluster
172
- - Estimated savings: 769 tokens (30%)
173
-
174
- **These are evidence-based recommendations**, not guesses based on folder names.
175
-
176
- ## Confidence Scoring Validation
177
-
178
- Spot-checked 10 random files:
179
-
180
- | File | Primary Domain | Confidence | Signals | Correct? |
181
- |------|---------------|------------|---------|----------|
182
- | partners.ts | partner | High | 4/5 signals | ✅ |
183
- | gift-notification.ts | gift | High | 4/5 signals | ✅ |
184
- | documentai-adapter.ts | google | Medium | 3/5 signals | ✅ |
185
- | session.ts | gift | Medium | 2/5 signals | ✅ |
186
- | categorization.ts | categorization | High | 4/5 signals | ✅ |
187
- | mileage-test-helpers.ts | mileage | High | 4/5 signals | ✅ |
188
- | layout.tsx | google | Low | 2/5 signals | ✅ |
189
- | rate-limit.ts | export | Low | 1/5 signals | ✅ |
190
- | nav-links.ts | order | Medium | 2/5 signals | ✅ |
191
- | PartnerDashboardClient.tsx | partner | High | 4/5 signals | ✅ |
192
-
193
- **10/10 correct** - 100% accuracy on spot check
194
-
195
- ## Edge Cases Handled Correctly
196
-
197
- 1. **Cross-cutting concerns** - `shared` domain correctly identified
198
- 2. **Integration layers** - Multi-domain files work as expected
199
- 3. **Test files** - Correctly grouped with tested domain
200
- 4. **Infrastructure** - `google`, `export` domains separate from business logic
201
- 5. **Generic names** - No longer result in "unknown"
202
-
203
- ## Known Limitations
204
-
205
- 1. **New codebases with few files** - Co-usage matrix sparse, confidence low (expected)
206
- 2. **Very isolated files** - May fall back to folder heuristics (acceptable)
207
- 3. **No imports** - Can't infer from co-usage (expected, rare)
208
-
209
- ## Conclusion
210
-
211
- ✅ **Semantic analysis is production-ready**
212
-
213
- The pivot from folder-based heuristics to semantic analysis (co-usage + types) dramatically improves domain identification accuracy while maintaining performance.
214
-
215
- **Key Achievement:** We now answer the right question:
216
- ~~"What folder is this file in?"~~
217
- ✅ **"Which files need to be loaded together to understand this code?"**
218
-
219
- This is the correct foundation for AI context optimization.
220
-
221
- ## Recommendations
222
-
223
- 1. ✅ **Deploy to production** - Validated and ready
224
- 2. ✅ **Release as v0.7.0** - Major improvement
225
- 3. ✅ **Config-free approach** - Domain detection fully automatic, no user configuration needed
226
- 4. 🔬 **Add call graph analysis** - Next enhancement (v0.8.0)
227
- 5. 🔬 **Add embedding-based clustering** - Future enhancement (v1.0.0)
228
-
229
- ## Next Steps
230
-
231
- - [x] Implement semantic analysis
232
- - [x] Validate on real codebase
233
- - [ ] Add comprehensive tests for semantic features
234
- - [ ] Document confidence scoring for users
235
- - [ ] Release v0.7.0