code2llm 0.5.18__tar.gz → 0.5.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {code2llm-0.5.18 → code2llm-0.5.19}/PKG-INFO +38 -21
  2. {code2llm-0.5.18 → code2llm-0.5.19}/README.md +37 -20
  3. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/__init__.py +1 -1
  4. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/cli.py +32 -12
  5. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/cli_exports.py +38 -17
  6. code2llm-0.5.19/code2llm/core/large_repo.py +375 -0
  7. code2llm-0.5.19/code2llm/core/toon_size_manager.py +247 -0
  8. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/nlp/__init__.py +1 -1
  9. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm.egg-info/PKG-INFO +38 -21
  10. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm.egg-info/SOURCES.txt +1 -0
  11. {code2llm-0.5.18 → code2llm-0.5.19}/pyproject.toml +1 -1
  12. code2llm-0.5.18/code2llm/core/large_repo.py +0 -258
  13. {code2llm-0.5.18 → code2llm-0.5.19}/LICENSE +0 -0
  14. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/__main__.py +0 -0
  15. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/analysis/__init__.py +0 -0
  16. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/analysis/call_graph.py +0 -0
  17. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/analysis/cfg.py +0 -0
  18. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/analysis/coupling.py +0 -0
  19. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/analysis/data_analysis.py +0 -0
  20. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/analysis/dfg.py +0 -0
  21. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/analysis/pipeline_detector.py +0 -0
  22. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/analysis/side_effects.py +0 -0
  23. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/analysis/smells.py +0 -0
  24. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/analysis/type_inference.py +0 -0
  25. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/__init__.py +0 -0
  26. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/analyzer.py +0 -0
  27. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/config.py +0 -0
  28. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/core/__init__.py +0 -0
  29. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/core/file_analyzer.py +0 -0
  30. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/core/file_cache.py +0 -0
  31. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/core/file_filter.py +0 -0
  32. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/core/refactoring.py +0 -0
  33. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/models.py +0 -0
  34. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/streaming/__init__.py +0 -0
  35. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/streaming/cache.py +0 -0
  36. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/streaming/incremental.py +0 -0
  37. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/streaming/prioritizer.py +0 -0
  38. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/streaming/scanner.py +0 -0
  39. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/streaming/strategies.py +0 -0
  40. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/core/streaming_analyzer.py +0 -0
  41. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/__init__.py +0 -0
  42. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/base.py +0 -0
  43. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/context_exporter.py +0 -0
  44. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/evolution_exporter.py +0 -0
  45. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/flow_constants.py +0 -0
  46. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/flow_exporter.py +0 -0
  47. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/flow_renderer.py +0 -0
  48. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/json_exporter.py +0 -0
  49. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/llm_exporter.py +0 -0
  50. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/map_exporter.py +0 -0
  51. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/mermaid_exporter.py +0 -0
  52. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/readme_exporter.py +0 -0
  53. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/toon/__init__.py +0 -0
  54. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/toon/helpers.py +0 -0
  55. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/toon/metrics.py +0 -0
  56. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/toon/module_detail.py +0 -0
  57. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/toon/renderer.py +0 -0
  58. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/toon.py +0 -0
  59. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/exporters/yaml_exporter.py +0 -0
  60. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/generators/__init__.py +0 -0
  61. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/generators/llm_flow.py +0 -0
  62. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/generators/llm_task.py +0 -0
  63. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/generators/mermaid.py +0 -0
  64. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/nlp/config.py +0 -0
  65. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/nlp/entity_resolution.py +0 -0
  66. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/nlp/intent_matching.py +0 -0
  67. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/nlp/normalization.py +0 -0
  68. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/nlp/pipeline.py +0 -0
  69. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/patterns/__init__.py +0 -0
  70. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/patterns/detector.py +0 -0
  71. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/refactor/__init__.py +0 -0
  72. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm/refactor/prompt_engine.py +0 -0
  73. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm.egg-info/dependency_links.txt +0 -0
  74. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm.egg-info/entry_points.txt +0 -0
  75. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm.egg-info/requires.txt +0 -0
  76. {code2llm-0.5.18 → code2llm-0.5.19}/code2llm.egg-info/top_level.txt +0 -0
  77. {code2llm-0.5.18 → code2llm-0.5.19}/setup.cfg +0 -0
  78. {code2llm-0.5.18 → code2llm-0.5.19}/setup.py +0 -0
  79. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_advanced_analysis.py +0 -0
  80. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_analyzer.py +0 -0
  81. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_deep_analysis.py +0 -0
  82. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_edge_cases.py +0 -0
  83. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_flow_exporter.py +0 -0
  84. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_format_quality.py +0 -0
  85. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_nlp_pipeline.py +0 -0
  86. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_pipeline_detector.py +0 -0
  87. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_prompt_engine.py +0 -0
  88. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_prompt_txt.py +0 -0
  89. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_refactoring_engine.py +0 -0
  90. {code2llm-0.5.18 → code2llm-0.5.19}/tests/test_toon_v2.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code2llm
3
- Version: 0.5.18
3
+ Version: 0.5.19
4
4
  Summary: High-performance Python code flow analysis with optimized TOON format - CFG, DFG, call graphs, and intelligent code queries
5
5
  Home-page: https://github.com/wronai/stts
6
6
  Author: STTS Project
@@ -96,46 +96,63 @@ code2llm ./ -f all --max-memory 500
96
96
  code2llm ./ -f all --no-png
97
97
  ```
98
98
 
99
- ### Large Repository Analysis (Chunking)
100
- For repositories >100 files, automatic chunking splits analysis into smaller subprojects:
99
+ ### Large Repository Analysis (Hierarchical Chunking)
100
+ For large repositories, automatic hierarchical chunking ensures each output file stays under 256KB:
101
101
 
102
102
  ```bash
103
- # Auto-chunking when >100 files detected
103
+ # Auto-chunking when estimated output >256KB
104
104
  code2llm ./ -f toon,evolution,code2logic --verbose
105
105
 
106
106
  # Force chunking with custom size limit
107
107
  code2llm ./ -f toon --chunk --chunk-size 256
108
108
 
109
- # Analyze only specific subproject
109
+ # Analyze only specific subproject (matches level-1 or level-2 names)
110
110
  code2llm ./ -f toon --only-subproject src
111
+ code2llm ./ -f toon --only-subproject src.core
111
112
 
112
- # Skip tests and examples
113
- code2llm ./ -f toon --skip-subprojects tests examples
113
+ # Skip specific directories
114
+ code2llm ./ -f toon --skip-subprojects tests examples docs
114
115
 
115
- # Customize file limit per chunk
116
- code2llm ./ -f toon --chunk --max-files-per-chunk 50
116
+ # Customize chunking parameters
117
+ code2llm ./ -f toon --chunk --max-files-per-chunk 50 --chunk-size 512
117
118
  ```
118
119
 
119
- **Chunking Benefits:**
120
- - Each subproject analyzed separately (examples/, tests/, src/, etc.)
121
- - Output limited to ~256KB per file (configurable)
122
- - Parallel processing of chunks possible
123
- - Reduced memory usage for large repos
120
+ **Hierarchical Splitting Strategy:**
121
+ 1. **Level 0**: Entire project (if small enough, <256KB)
122
+ 2. **Level 1**: Top-level directories (src/, tests/, examples/)
123
+ 3. **Level 2**: Subdirectories if parent >256KB (src.core/, src.utils/)
124
+ 4. **Level 3**: File chunks if still too large
124
125
 
125
- **Output Structure:**
126
+ **Example Output Structure:**
126
127
  ```
127
128
  ./project/
128
- ├── src/ # Core code analysis
129
- │ ├── analysis.toon
129
+ ├── src/ # Level 1: src/ fits in 256KB
130
+ │ ├── analysis.toon # (~200KB)
130
131
  │ └── evolution.toon
131
- ├── tests/ # Test code analysis
132
+ ├── src_core/ # Level 2: src/core/ was too big
133
+ │ ├── analysis.toon # (~180KB)
134
+ │ └── evolution.toon
135
+ ├── src_utils_part1/ # Level 3: split by file count
136
+ │ └── analysis.toon # (~150KB)
137
+ ├── tests/ # Level 1: tests/
132
138
  │ └── analysis.toon
133
- ├── examples/ # Examples analysis
139
+ ├── examples/ # Level 1: examples/
134
140
  │ └── analysis.toon
135
- ├── analysis.toon # Merged summary
136
- └── evolution.toon # Full refactoring queue
141
+ ├── analysis.toon # Merged summary (all levels)
142
+ └── evolution.toon # Full refactoring queue
137
143
  ```
138
144
 
145
+ **Size Estimation:**
146
+ - ~3KB per Python file in TOON format
147
+ - Auto-detect chunking when: `file_count × 3KB > 256KB`
148
+ - Example: 100 files ≈ 300KB → triggers chunking
149
+
150
+ **Benefits:**
151
+ - Each output file <256KB (easy for LLMs to process)
152
+ - Natural code boundaries (module/submodule level)
153
+ - Incremental analysis possible
154
+ - Parallel processing ready
155
+
139
156
  ### Refactoring Focus
140
157
  ```bash
141
158
  # Get refactoring recommendations
@@ -46,46 +46,63 @@ code2llm ./ -f all --max-memory 500
46
46
  code2llm ./ -f all --no-png
47
47
  ```
48
48
 
49
- ### Large Repository Analysis (Chunking)
50
- For repositories >100 files, automatic chunking splits analysis into smaller subprojects:
49
+ ### Large Repository Analysis (Hierarchical Chunking)
50
+ For large repositories, automatic hierarchical chunking ensures each output file stays under 256KB:
51
51
 
52
52
  ```bash
53
- # Auto-chunking when >100 files detected
53
+ # Auto-chunking when estimated output >256KB
54
54
  code2llm ./ -f toon,evolution,code2logic --verbose
55
55
 
56
56
  # Force chunking with custom size limit
57
57
  code2llm ./ -f toon --chunk --chunk-size 256
58
58
 
59
- # Analyze only specific subproject
59
+ # Analyze only specific subproject (matches level-1 or level-2 names)
60
60
  code2llm ./ -f toon --only-subproject src
61
+ code2llm ./ -f toon --only-subproject src.core
61
62
 
62
- # Skip tests and examples
63
- code2llm ./ -f toon --skip-subprojects tests examples
63
+ # Skip specific directories
64
+ code2llm ./ -f toon --skip-subprojects tests examples docs
64
65
 
65
- # Customize file limit per chunk
66
- code2llm ./ -f toon --chunk --max-files-per-chunk 50
66
+ # Customize chunking parameters
67
+ code2llm ./ -f toon --chunk --max-files-per-chunk 50 --chunk-size 512
67
68
  ```
68
69
 
69
- **Chunking Benefits:**
70
- - Each subproject analyzed separately (examples/, tests/, src/, etc.)
71
- - Output limited to ~256KB per file (configurable)
72
- - Parallel processing of chunks possible
73
- - Reduced memory usage for large repos
70
+ **Hierarchical Splitting Strategy:**
71
+ 1. **Level 0**: Entire project (if small enough, <256KB)
72
+ 2. **Level 1**: Top-level directories (src/, tests/, examples/)
73
+ 3. **Level 2**: Subdirectories if parent >256KB (src.core/, src.utils/)
74
+ 4. **Level 3**: File chunks if still too large
74
75
 
75
- **Output Structure:**
76
+ **Example Output Structure:**
76
77
  ```
77
78
  ./project/
78
- ├── src/ # Core code analysis
79
- │ ├── analysis.toon
79
+ ├── src/ # Level 1: src/ fits in 256KB
80
+ │ ├── analysis.toon # (~200KB)
80
81
  │ └── evolution.toon
81
- ├── tests/ # Test code analysis
82
+ ├── src_core/ # Level 2: src/core/ was too big
83
+ │ ├── analysis.toon # (~180KB)
84
+ │ └── evolution.toon
85
+ ├── src_utils_part1/ # Level 3: split by file count
86
+ │ └── analysis.toon # (~150KB)
87
+ ├── tests/ # Level 1: tests/
82
88
  │ └── analysis.toon
83
- ├── examples/ # Examples analysis
89
+ ├── examples/ # Level 1: examples/
84
90
  │ └── analysis.toon
85
- ├── analysis.toon # Merged summary
86
- └── evolution.toon # Full refactoring queue
91
+ ├── analysis.toon # Merged summary (all levels)
92
+ └── evolution.toon # Full refactoring queue
87
93
  ```
88
94
 
95
+ **Size Estimation:**
96
+ - ~3KB per Python file in TOON format
97
+ - Auto-detect chunking when: `file_count × 3KB > 256KB`
98
+ - Example: 100 files ≈ 300KB → triggers chunking
99
+
100
+ **Benefits:**
101
+ - Each output file <256KB (easy for LLMs to process)
102
+ - Natural code boundaries (module/submodule level)
103
+ - Incremental analysis possible
104
+ - Parallel processing ready
105
+
89
106
  ### Refactoring Focus
90
107
  ```bash
91
108
  # Get refactoring recommendations
@@ -8,7 +8,7 @@ Includes NLP Processing Pipeline for query normalization, intent matching,
8
8
  and entity resolution with multilingual support.
9
9
  """
10
10
 
11
- __version__ = "0.5.18"
11
+ __version__ = "0.5.19"
12
12
  __author__ = "STTS Project"
13
13
 
14
14
  # Core analysis components
@@ -303,7 +303,9 @@ def _run_analysis(args, source_path: Path, output_dir: Path):
303
303
  Returns AnalysisResult or exits on error.
304
304
  For large repos, may analyze in chunks and merge results.
305
305
  """
306
- from .core.large_repo import LargeRepoSplitter, should_use_chunking
306
+ from .core.large_repo import (
307
+ HierarchicalRepoSplitter, should_use_chunking, get_analysis_plan
308
+ )
307
309
 
308
310
  # Check if we should use chunked analysis
309
311
  use_chunking = (
@@ -347,39 +349,57 @@ def _run_analysis(args, source_path: Path, output_dir: Path):
347
349
 
348
350
 
349
351
  def _run_chunked_analysis(args, source_path: Path, output_dir: Path):
350
- """Analyze large repository in chunks by subproject."""
351
- from .core.large_repo import LargeRepoSplitter, SubProject
352
+ """Analyze large repository using hierarchical chunking.
353
+
354
+ Strategy:
355
+ 1. Level 1 folders first
356
+ 2. If >256KB, split to level 2 subfolders
357
+ 3. If still too big, use file chunking
358
+ """
359
+ from .core.large_repo import HierarchicalRepoSplitter
352
360
 
353
- splitter = LargeRepoSplitter(
361
+ splitter = HierarchicalRepoSplitter(
354
362
  size_limit_kb=args.chunk_size,
355
363
  max_files_per_chunk=args.max_files_per_chunk
356
364
  )
357
365
 
358
- # Get analysis plan
366
+ # Get hierarchical analysis plan
359
367
  subprojects = splitter.get_analysis_plan(source_path)
360
368
 
361
369
  if args.verbose:
362
- print(f"Repository split into {len(subprojects)} subprojects:")
370
+ print(f"Hierarchical analysis plan ({len(subprojects)} chunks):")
371
+ level_counts = {}
372
+ for sp in subprojects:
373
+ level_counts[sp.level] = level_counts.get(sp.level, 0) + 1
374
+
375
+ for level in sorted(level_counts.keys()):
376
+ level_name = {0: 'root', 1: 'level-1', 2: 'level-2', 3: 'file-chunks'}.get(level, f'level-{level}')
377
+ print(f" {level_name}: {level_counts[level]} chunks")
378
+
379
+ print("\nChunks:")
363
380
  for sp in subprojects:
364
- print(f" - {sp.name}: {sp.file_count} files (~{sp.estimated_size_kb}KB)")
381
+ level_indicator = " " * sp.level
382
+ size_info = f"~{sp.estimated_size_kb}KB"
383
+ print(f"{level_indicator}{sp.name}: {sp.file_count} files ({size_info})")
365
384
 
366
385
  # Filter subprojects if requested
367
386
  if args.only_subproject:
368
- subprojects = [sp for sp in subprojects if sp.name == args.only_subproject]
387
+ subprojects = [sp for sp in subprojects if sp.name == args.only_subproject or sp.name.startswith(args.only_subproject + '.')]
369
388
  if not subprojects:
370
389
  print(f"Error: Subproject '{args.only_subproject}' not found", file=sys.stderr)
371
390
  sys.exit(1)
372
391
 
373
392
  if args.skip_subprojects:
374
- subprojects = [sp for sp in subprojects if sp.name not in args.skip_subprojects]
393
+ subprojects = [sp for sp in subprojects if not any(sp.name.startswith(skip) for skip in args.skip_subprojects)]
375
394
 
376
395
  # Analyze each subproject
377
396
  all_results = []
378
397
  for i, subproject in enumerate(subprojects, 1):
379
398
  if args.verbose:
380
- print(f"\n[{i}/{len(subprojects)}] Analyzing: {subproject.name}")
399
+ level_name = {0: 'root', 1: 'L1', 2: 'L2', 3: 'chunk'}.get(subproject.level, f'L{subproject.level}')
400
+ print(f"\n[{i}/{len(subprojects)}] Analyzing [{level_name}]: {subproject.name}")
381
401
 
382
- sp_output_dir = output_dir / subproject.name
402
+ sp_output_dir = output_dir / subproject.name.replace('.', '_')
383
403
  sp_output_dir.mkdir(parents=True, exist_ok=True)
384
404
 
385
405
  result = _analyze_subproject(args, subproject, sp_output_dir)
@@ -391,7 +411,7 @@ def _run_chunked_analysis(args, source_path: Path, output_dir: Path):
391
411
 
392
412
  if args.verbose:
393
413
  print(f"\nChunked analysis complete:")
394
- print(f" - Subprojects analyzed: {len(all_results)}")
414
+ print(f" - Chunks analyzed: {len(all_results)}")
395
415
  print(f" - Total functions: {len(merged_result.functions)}")
396
416
  print(f" - Total classes: {len(merged_result.classes)}")
397
417
 
@@ -1,10 +1,11 @@
1
1
  """Export functions for CLI - extracted from cli.py to reduce module complexity."""
2
2
 
3
+ import os
3
4
  import shutil
4
5
  import subprocess
5
6
  import sys
6
7
  from pathlib import Path
7
- from typing import Optional
8
+ from typing import List, Optional
8
9
 
9
10
  from .exporters import (
10
11
  YAMLExporter, JSONExporter, MermaidExporter,
@@ -71,10 +72,16 @@ def _export_code2logic(args, source_path: Path, output_dir: Path, formats: list[
71
72
 
72
73
  found = _find_code2logic_output(output_dir, res)
73
74
  target = output_dir / 'project.toon'
74
- _normalize_code2logic_output(found, target)
75
+ final_files = _normalize_code2logic_output(found, target, args)
75
76
 
76
77
  if args.verbose:
77
- print(f" - CODE2LOGIC (project logic): {target}")
78
+ if len(final_files) == 1:
79
+ print(f" - CODE2LOGIC (project logic): {final_files[0]}")
80
+ else:
81
+ print(f" - CODE2LOGIC (project logic): {len(final_files)} parts")
82
+ for f in final_files:
83
+ size_kb = os.path.getsize(f) / 1024
84
+ print(f" → {f.name}: {size_kb:.1f}KB")
78
85
 
79
86
 
80
87
  def _should_run_code2logic(formats: list[str]) -> bool:
@@ -155,11 +162,22 @@ def _find_code2logic_output(output_dir: Path, res) -> Path:
155
162
  return found
156
163
 
157
164
 
158
- def _normalize_code2logic_output(found: Path, target: Path) -> None:
159
- """Normalize output location to target path."""
165
+ def _normalize_code2logic_output(found: Path, target: Path, args) -> List[Path]:
166
+ """Normalize output location to target path and check size limits."""
160
167
  if found != target:
161
168
  target.parent.mkdir(parents=True, exist_ok=True)
162
169
  shutil.copyfile(found, target)
170
+ found = target
171
+
172
+ # Check and split if exceeds 256KB limit
173
+ from .core.toon_size_manager import manage_toon_size
174
+ return manage_toon_size(
175
+ found,
176
+ target.parent,
177
+ max_kb=256,
178
+ prefix="project",
179
+ verbose=getattr(args, 'verbose', False)
180
+ )
163
181
 
164
182
 
165
183
  def _export_prompt_txt(args, output_dir: Path, formats: list[str], source_path: Optional[Path] = None) -> None:
@@ -383,29 +401,32 @@ def _export_single_project(args, result, output_dir: Path, formats: list, source
383
401
 
384
402
  def _export_chunked_results(args, result, output_dir: Path, source_path: Path, formats: list):
385
403
  """Export chunked analysis results to subproject directories."""
386
- from .core.large_repo import LargeRepoSplitter
404
+ from .core.large_repo import HierarchicalRepoSplitter
387
405
 
388
- splitter = LargeRepoSplitter()
389
- subprojects = splitter.detect_subprojects(source_path)
406
+ splitter = HierarchicalRepoSplitter(size_limit_kb=args.chunk_size)
407
+ subprojects = splitter.get_analysis_plan(source_path)
390
408
 
391
409
  # Filter subprojects same as in analysis
392
410
  if hasattr(args, 'only_subproject') and args.only_subproject:
393
- subprojects = [sp for sp in subprojects if sp.name == args.only_subproject]
411
+ subprojects = [sp for sp in subprojects if sp.name == args.only_subproject or sp.name.startswith(args.only_subproject + '.')]
394
412
 
395
413
  if hasattr(args, 'skip_subprojects') and args.skip_subprojects:
396
- subprojects = [sp for sp in subprojects if sp.name not in args.skip_subprojects]
414
+ subprojects = [sp for sp in subprojects if not any(sp.name.startswith(skip) for skip in args.skip_subprojects)]
397
415
 
398
416
  # Export each subproject to its own directory
399
417
  for sp in subprojects:
400
- sp_output_dir = output_dir / sp.name
418
+ sp_output_dir = output_dir / sp.name.replace('.', '_')
401
419
  if not sp_output_dir.exists():
402
- continue # Skip if subproject wasn't analyzed
420
+ continue
403
421
 
404
- # Check for subproject result file
405
- sp_result_file = sp_output_dir / 'analysis.yaml'
406
- if sp_result_file.exists():
407
- if args.verbose:
408
- print(f" - Exported {sp.name} to {sp_output_dir}")
422
+ # Check for subproject result files
423
+ for ext in ['.toon', '.yaml', '.json']:
424
+ result_file = sp_output_dir / f'analysis{ext}'
425
+ if result_file.exists():
426
+ if args.verbose:
427
+ level_name = {0: 'root', 1: 'L1', 2: 'L2'}.get(sp.level, f'L{sp.level}')
428
+ print(f" - Exported [{level_name}] {sp.name}")
429
+ break
409
430
 
410
431
  # Also create merged summary in root output dir
411
432
  _export_simple_formats(args, result, output_dir, ['toon', 'context'])