code2llm 0.5.52__tar.gz → 0.5.53__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. {code2llm-0.5.52 → code2llm-0.5.53}/PKG-INFO +1 -1
  2. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/__init__.py +1 -1
  3. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/cli_analysis.py +37 -6
  4. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/cli_exports/formats.py +17 -1
  5. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/cli_exports/orchestrator.py +7 -0
  6. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/cli_parser.py +13 -0
  7. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/analyzer.py +8 -2
  8. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/config.py +1 -0
  9. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/file_filter.py +15 -1
  10. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/cpp.py +29 -3
  11. code2llm-0.5.53/code2llm/core/gitignore.py +125 -0
  12. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/__init__.py +2 -0
  13. code2llm-0.5.53/code2llm/exporters/index_generator.py +619 -0
  14. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/nlp/__init__.py +1 -1
  15. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm.egg-info/PKG-INFO +1 -1
  16. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm.egg-info/SOURCES.txt +2 -0
  17. {code2llm-0.5.52 → code2llm-0.5.53}/pyproject.toml +1 -1
  18. {code2llm-0.5.52 → code2llm-0.5.53}/LICENSE +0 -0
  19. {code2llm-0.5.52 → code2llm-0.5.53}/README.md +0 -0
  20. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/__main__.py +0 -0
  21. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/analysis/__init__.py +0 -0
  22. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/analysis/call_graph.py +0 -0
  23. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/analysis/cfg.py +0 -0
  24. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/analysis/coupling.py +0 -0
  25. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/analysis/data_analysis.py +0 -0
  26. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/analysis/dfg.py +0 -0
  27. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/analysis/pipeline_detector.py +0 -0
  28. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/analysis/side_effects.py +0 -0
  29. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/analysis/smells.py +0 -0
  30. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/analysis/type_inference.py +0 -0
  31. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/api.py +0 -0
  32. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/cli.py +0 -0
  33. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/cli_commands.py +0 -0
  34. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/cli_exports/__init__.py +0 -0
  35. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/cli_exports/code2logic.py +0 -0
  36. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/cli_exports/prompt.py +0 -0
  37. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/__init__.py +0 -0
  38. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/__init__.py +0 -0
  39. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/file_analyzer.py +0 -0
  40. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/file_cache.py +0 -0
  41. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/__init__.py +0 -0
  42. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/base.py +0 -0
  43. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/csharp.py +0 -0
  44. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/generic.py +0 -0
  45. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/go_lang.py +0 -0
  46. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/java.py +0 -0
  47. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/php.py +0 -0
  48. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/ruby.py +0 -0
  49. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/rust.py +0 -0
  50. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/lang/typescript.py +0 -0
  51. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/core/refactoring.py +0 -0
  52. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/large_repo.py +0 -0
  53. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/models.py +0 -0
  54. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/repo_files.py +0 -0
  55. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/streaming/__init__.py +0 -0
  56. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/streaming/cache.py +0 -0
  57. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/streaming/incremental.py +0 -0
  58. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/streaming/prioritizer.py +0 -0
  59. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/streaming/scanner.py +0 -0
  60. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/streaming/strategies.py +0 -0
  61. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/streaming_analyzer.py +0 -0
  62. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/core/toon_size_manager.py +0 -0
  63. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/article_view.py +0 -0
  64. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/base.py +0 -0
  65. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/context_exporter.py +0 -0
  66. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/context_view.py +0 -0
  67. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/evolution_exporter.py +0 -0
  68. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/flow_constants.py +0 -0
  69. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/flow_exporter.py +0 -0
  70. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/flow_renderer.py +0 -0
  71. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/html_dashboard.py +0 -0
  72. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/json_exporter.py +0 -0
  73. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/llm_exporter.py +0 -0
  74. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/map_exporter.py +0 -0
  75. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/mermaid_exporter.py +0 -0
  76. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/project_yaml_exporter.py +0 -0
  77. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/readme_exporter.py +0 -0
  78. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/report_generators.py +0 -0
  79. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/toon/__init__.py +0 -0
  80. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/toon/helpers.py +0 -0
  81. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/toon/metrics.py +0 -0
  82. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/toon/module_detail.py +0 -0
  83. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/toon/renderer.py +0 -0
  84. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/toon.py +0 -0
  85. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/toon_view.py +0 -0
  86. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/validate_project.py +0 -0
  87. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/exporters/yaml_exporter.py +0 -0
  88. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/generators/__init__.py +0 -0
  89. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/generators/llm_flow.py +0 -0
  90. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/generators/llm_task.py +0 -0
  91. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/generators/mermaid.py +0 -0
  92. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/nlp/config.py +0 -0
  93. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/nlp/entity_resolution.py +0 -0
  94. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/nlp/intent_matching.py +0 -0
  95. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/nlp/normalization.py +0 -0
  96. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/nlp/pipeline.py +0 -0
  97. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/patterns/__init__.py +0 -0
  98. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/patterns/detector.py +0 -0
  99. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/refactor/__init__.py +0 -0
  100. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm/refactor/prompt_engine.py +0 -0
  101. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm.egg-info/dependency_links.txt +0 -0
  102. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm.egg-info/entry_points.txt +0 -0
  103. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm.egg-info/requires.txt +0 -0
  104. {code2llm-0.5.52 → code2llm-0.5.53}/code2llm.egg-info/top_level.txt +0 -0
  105. {code2llm-0.5.52 → code2llm-0.5.53}/setup.cfg +0 -0
  106. {code2llm-0.5.52 → code2llm-0.5.53}/setup.py +0 -0
  107. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_advanced_analysis.py +0 -0
  108. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_analyzer.py +0 -0
  109. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_deep_analysis.py +0 -0
  110. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_edge_cases.py +0 -0
  111. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_flow_exporter.py +0 -0
  112. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_format_quality.py +0 -0
  113. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_multilanguage_e2e.py +0 -0
  114. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_nlp_pipeline.py +0 -0
  115. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_nonpython_cc_calls.py +0 -0
  116. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_pipeline_detector.py +0 -0
  117. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_prompt_engine.py +0 -0
  118. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_prompt_txt.py +0 -0
  119. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_refactoring_engine.py +0 -0
  120. {code2llm-0.5.52 → code2llm-0.5.53}/tests/test_toon_v2.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: code2llm
3
- Version: 0.5.52
3
+ Version: 0.5.53
4
4
  Summary: High-performance Python code flow analysis with optimized TOON format - CFG, DFG, call graphs, and intelligent code queries
5
5
  Home-page: https://github.com/wronai/stts
6
6
  Author: STTS Project
@@ -8,7 +8,7 @@ Includes NLP Processing Pipeline for query normalization, intent matching,
8
8
  and entity resolution with multilingual support.
9
9
  """
10
10
 
11
- __version__ = "0.5.52"
11
+ __version__ = "0.5.53"
12
12
  __author__ = "STTS Project"
13
13
 
14
14
  # Core analysis components (lightweight, always needed)
@@ -42,7 +42,7 @@ def _run_standard_analysis(args, source_path: Path, output_dir: Path):
42
42
  if args.streaming or args.strategy in ['quick', 'deep']:
43
43
  result = _run_streaming_analysis(args, config, source_path)
44
44
  else:
45
- analyzer = ProjectAnalyzer(config)
45
+ analyzer = ProjectAnalyzer(config, source_path)
46
46
  result = analyzer.analyze_project(str(source_path))
47
47
 
48
48
  if args.verbose:
@@ -57,13 +57,29 @@ def _run_standard_analysis(args, source_path: Path, output_dir: Path):
57
57
 
58
58
  def _build_config(args, output_dir: Path):
59
59
  """Build analysis Config from CLI args."""
60
- from .core.config import Config
60
+ from .core.config import Config, FilterConfig
61
+
62
+ # Start with default filter config
63
+ filter_config = FilterConfig()
64
+
65
+ # Apply custom exclude patterns if provided
66
+ if hasattr(args, 'exclude') and args.exclude:
67
+ default_patterns = filter_config.exclude_patterns
68
+ custom_patterns = [f"*{pattern}*" if not pattern.startswith('*') and not pattern.endswith('*') else pattern
69
+ for pattern in args.exclude]
70
+ filter_config.exclude_patterns = list(set(default_patterns + custom_patterns))
71
+
72
+ # Apply gitignore setting
73
+ if hasattr(args, 'no_gitignore') and args.no_gitignore:
74
+ filter_config.gitignore_enabled = False
75
+
61
76
  return Config(
62
77
  mode=args.mode,
63
78
  max_depth_enumeration=args.max_depth,
64
79
  detect_state_machines=not args.no_patterns,
65
80
  detect_recursion=not args.no_patterns,
66
- output_dir=str(output_dir)
81
+ output_dir=str(output_dir),
82
+ filters=filter_config
67
83
  )
68
84
 
69
85
 
@@ -173,19 +189,34 @@ def _analyze_all_subprojects(args, subprojects, output_dir: Path) -> list:
173
189
  def _analyze_subproject(args, subproject, output_dir: Path):
174
190
  """Analyze and export a single subproject."""
175
191
  from .core.analyzer import ProjectAnalyzer
176
- from .core.config import Config
192
+ from .core.config import Config, FilterConfig
177
193
  from .cli_exports import _export_simple_formats, _export_evolution
178
194
 
195
+ # Start with default filter config
196
+ filter_config = FilterConfig()
197
+
198
+ # Apply custom exclude patterns if provided
199
+ if hasattr(args, 'exclude') and args.exclude:
200
+ default_patterns = filter_config.exclude_patterns
201
+ custom_patterns = [f"*{pattern}*" if not pattern.startswith('*') and not pattern.endswith('*') else pattern
202
+ for pattern in args.exclude]
203
+ filter_config.exclude_patterns = list(set(default_patterns + custom_patterns))
204
+
205
+ # Apply gitignore setting
206
+ if hasattr(args, 'no_gitignore') and args.no_gitignore:
207
+ filter_config.gitignore_enabled = False
208
+
179
209
  config = Config(
180
210
  mode=args.mode,
181
211
  max_depth_enumeration=args.max_depth,
182
212
  detect_state_machines=not args.no_patterns,
183
213
  detect_recursion=not args.no_patterns,
184
214
  output_dir=str(output_dir),
185
- verbose=args.verbose
215
+ verbose=args.verbose,
216
+ filters=filter_config
186
217
  )
187
218
 
188
- analyzer = ProjectAnalyzer(config)
219
+ analyzer = ProjectAnalyzer(config, subproject.path)
189
220
 
190
221
  try:
191
222
  result = analyzer.analyze_project(str(subproject.path))
@@ -11,7 +11,7 @@ from ..exporters import (
11
11
  EvolutionExporter, READMEExporter, ProjectYAMLExporter,
12
12
  ToonViewGenerator, ContextViewGenerator,
13
13
  ArticleViewGenerator, HTMLDashboardGenerator,
14
- load_project_yaml,
14
+ load_project_yaml, IndexHTMLGenerator,
15
15
  )
16
16
 
17
17
 
@@ -224,3 +224,19 @@ def _export_refactor_prompts(args, result, output_dir: Path):
224
224
  else:
225
225
  if args.verbose:
226
226
  print(" - Refactoring: No code smells detected.")
227
+
228
+
229
+ def _export_index_html(args, output_dir: Path) -> None:
230
+ """Generate index.html for browsing all generated files."""
231
+ # Only generate index.html when 'all' formats is used
232
+ if 'all' not in args.format:
233
+ return
234
+
235
+ try:
236
+ generator = IndexHTMLGenerator(output_dir)
237
+ index_path = generator.generate()
238
+ if args.verbose:
239
+ print(f" - INDEX (file browser): {index_path}")
240
+ except Exception as e:
241
+ if args.verbose:
242
+ print(f" - INDEX generation failed: {e}", file=sys.stderr)
@@ -12,6 +12,7 @@ from .formats import (
12
12
  _export_context_fallback,
13
13
  _export_readme,
14
14
  _export_refactor_prompts,
15
+ _export_index_html,
15
16
  )
16
17
  from .code2logic import _export_code2logic
17
18
  from .prompt import _export_prompt_txt, _export_chunked_prompt_txt
@@ -57,6 +58,9 @@ def _export_single_project(args, result, output_dir: Path, formats: list, source
57
58
  _export_refactor_prompts(args, result, output_dir)
58
59
 
59
60
  _export_readme(args, result, output_dir)
61
+
62
+ # Generate index.html for browsing all files (only when 'all' formats used)
63
+ _export_index_html(args, output_dir)
60
64
 
61
65
 
62
66
  def _export_chunked_results(args, result, output_dir: Path, source_path: Path, formats: list):
@@ -94,3 +98,6 @@ def _export_chunked_results(args, result, output_dir: Path, source_path: Path, f
94
98
  _export_chunked_prompt_txt(args, output_dir, formats, source_path, subprojects)
95
99
 
96
100
  _export_readme(args, result, output_dir)
101
+
102
+ # Generate index.html for browsing all files (only when 'all' formats used)
103
+ _export_index_html(args, output_dir)
@@ -229,6 +229,19 @@ Strategy Options (--strategy):
229
229
  help='Analyze only specific subproject (e.g., --only-subproject src)'
230
230
  )
231
231
 
232
+ parser.add_argument(
233
+ '--exclude',
234
+ nargs='+',
235
+ default=[],
236
+ help='Exclude specific directories or patterns (e.g., --exclude vendor build test)'
237
+ )
238
+
239
+ parser.add_argument(
240
+ '--no-gitignore',
241
+ action='store_true',
242
+ help='Disable .gitignore support (include all files)'
243
+ )
244
+
232
245
  parser.add_argument(
233
246
  '--validate',
234
247
  action='store_true',
@@ -15,13 +15,14 @@ from .core import FileCache, FastFileFilter, FileAnalyzer, RefactoringAnalyzer,
15
15
  class ProjectAnalyzer:
16
16
  """Main analyzer with parallel processing."""
17
17
 
18
- def __init__(self, config: Optional[Config] = None):
18
+ def __init__(self, config: Optional[Config] = None, project_path: Optional[Path] = None):
19
19
  self.config = config or FAST_CONFIG
20
+ self.project_path = project_path
20
21
  self.cache = FileCache(
21
22
  self.config.performance.cache_dir,
22
23
  self.config.performance.cache_ttl_hours
23
24
  ) if self.config.performance.enable_cache else None
24
- self.file_filter = FastFileFilter(self.config.filters)
25
+ self.file_filter = FastFileFilter(self.config.filters, project_path)
25
26
  self.refactoring_analyzer = RefactoringAnalyzer(self.config, self.file_filter)
26
27
 
27
28
  def analyze_project(self, project_path: str) -> AnalysisResult:
@@ -32,6 +33,11 @@ class ProjectAnalyzer:
32
33
  if not project_path.exists():
33
34
  raise FileNotFoundError(f"Project path does not exist: {project_path}")
34
35
 
36
+ # Update project path for gitignore support if not set during init
37
+ if not self.project_path:
38
+ self.project_path = project_path
39
+ self.file_filter = FastFileFilter(self.config.filters, project_path)
40
+
35
41
  # Collect Python files
36
42
  files = self._collect_files(project_path)
37
43
 
@@ -46,6 +46,7 @@ class FilterConfig:
46
46
  skip_private: bool = False
47
47
  skip_properties: bool = True
48
48
  skip_accessors: bool = True
49
+ gitignore_enabled: bool = True
49
50
 
50
51
 
51
52
  @dataclass
@@ -1,21 +1,35 @@
1
1
  """Fast file filtering with pattern matching."""
2
2
 
3
3
  import fnmatch
4
+ from pathlib import Path
4
5
  from ..config import FilterConfig
6
+ from ..gitignore import load_gitignore_patterns
5
7
 
6
8
 
7
9
  class FastFileFilter:
8
10
  """Fast file filtering with pattern matching."""
9
11
 
10
- def __init__(self, config: FilterConfig):
12
+ def __init__(self, config: FilterConfig, project_path: Path = None):
11
13
  self.config = config
14
+ self.project_path = project_path
12
15
  self._exclude_patterns = [p.lower() for p in config.exclude_patterns]
13
16
  self._include_patterns = [p.lower() for p in config.include_patterns]
17
+
18
+ # Load gitignore patterns if enabled and project path is provided
19
+ self._gitignore_parser = None
20
+ if config.gitignore_enabled and project_path:
21
+ self._gitignore_parser = load_gitignore_patterns(project_path)
14
22
 
15
23
  def should_process(self, file_path: str) -> bool:
16
24
  """Check if file should be processed."""
17
25
  path_lower = file_path.lower()
18
26
 
27
+ # Check gitignore patterns first
28
+ if self._gitignore_parser and self.project_path:
29
+ file_path_obj = Path(file_path)
30
+ if self._gitignore_parser.is_ignored(file_path_obj, self.project_path):
31
+ return False
32
+
19
33
  # Check exclude patterns
20
34
  for pattern in self._exclude_patterns:
21
35
  if fnmatch.fnmatch(path_lower, pattern) or pattern in path_lower:
@@ -34,11 +34,35 @@ def analyze_cpp(content: str, file_path: str, module_name: str,
34
34
  current_namespace = None
35
35
  brace_depth = 0
36
36
  class_brace_depth = 0
37
+ in_block_comment = False
38
+ in_line_comment = False
37
39
 
38
40
  for line_no, line in enumerate(lines, 1):
39
41
  raw_line = line
40
42
  line = line.strip()
41
- if not line or line.startswith('//'):
43
+
44
+ # Handle block comments (/* ... */) and line comments
45
+ if not in_block_comment:
46
+ if '/*' in raw_line:
47
+ in_block_comment = True
48
+ # Remove everything after /* start
49
+ raw_line = raw_line.split('/*')[0]
50
+ line = raw_line.strip()
51
+ elif line.startswith('//'):
52
+ # Single line comment, skip entirely
53
+ continue
54
+ else:
55
+ if '*/' in raw_line:
56
+ # End of block comment
57
+ in_block_comment = False
58
+ # Remove everything before */ end
59
+ raw_line = raw_line.split('*/')[1]
60
+ line = raw_line.strip()
61
+ else:
62
+ # Still in block comment, skip this line
63
+ continue
64
+
65
+ if not line:
42
66
  continue
43
67
 
44
68
  # Track brace depth for class scope
@@ -89,9 +113,11 @@ def analyze_cpp(content: str, file_path: str, module_name: str,
89
113
  func_match = func_pattern.match(line)
90
114
  if func_match:
91
115
  func_name = func_match.group(1)
92
- # Skip keywords that look like functions
116
+ # Skip keywords that look like functions, plus common license terms
93
117
  if func_name in ('if', 'for', 'while', 'switch', 'catch', 'return',
94
- 'sizeof', 'decltype', 'typeof', 'new', 'delete'):
118
+ 'sizeof', 'decltype', 'typeof', 'new', 'delete',
119
+ 'Copyright', 'License', 'TORT', 'WITHOUT', 'WARRANTY',
120
+ 'Permission', 'Redistribution', 'Conditions', 'Disclaimer'):
95
121
  continue
96
122
 
97
123
  if current_class:
@@ -0,0 +1,125 @@
1
+ """Gitignore support for code2llm file filtering."""
2
+
3
+ from pathlib import Path
4
+ from typing import List, Set
5
+ import re
6
+
7
+
8
+ class GitIgnoreParser:
9
+ """Parse and apply .gitignore patterns to file paths."""
10
+
11
+ def __init__(self, gitignore_path: Path = None):
12
+ """Initialize parser with optional .gitignore file path."""
13
+ self.patterns: List[re.Pattern] = []
14
+ self.dir_patterns: List[re.Pattern] = []
15
+ self.negated_patterns: List[re.Pattern] = []
16
+
17
+ if gitignore_path and gitignore_path.exists():
18
+ self._load_gitignore(gitignore_path)
19
+
20
+ def _load_gitignore(self, gitignore_path: Path) -> None:
21
+ """Load and parse .gitignore file."""
22
+ try:
23
+ with open(gitignore_path, 'r', encoding='utf-8') as f:
24
+ for line_num, line in enumerate(f, 1):
25
+ line = line.rstrip()
26
+
27
+ # Skip empty lines and comments
28
+ if not line or line.startswith('#'):
29
+ continue
30
+
31
+ # Parse pattern
32
+ pattern = self._parse_pattern(line)
33
+ if pattern:
34
+ self.patterns.append(pattern)
35
+ except (OSError, UnicodeDecodeError):
36
+ # Silently ignore gitignore parsing errors
37
+ pass
38
+
39
+ def _parse_pattern(self, pattern: str) -> re.Pattern:
40
+ """Parse a single gitignore pattern into regex."""
41
+ is_negated = pattern.startswith('!')
42
+ if is_negated:
43
+ pattern = pattern[1:]
44
+
45
+ is_dir_only = pattern.endswith('/')
46
+ if is_dir_only:
47
+ pattern = pattern[:-1]
48
+
49
+ # Handle absolute patterns (starting with /)
50
+ if pattern.startswith('/'):
51
+ pattern = pattern[1:]
52
+ # Match from beginning of path
53
+ regex_pattern = f'^{self._wildcard_to_regex(pattern)}'
54
+ else:
55
+ # Match anywhere in path
56
+ regex_pattern = self._wildcard_to_regex(pattern)
57
+
58
+ # Match directory or file
59
+ if is_dir_only:
60
+ regex_pattern += f'(/.*)?$'
61
+ else:
62
+ regex_pattern += f'(/.*)?$'
63
+
64
+ try:
65
+ compiled = re.compile(regex_pattern)
66
+ if is_negated:
67
+ self.negated_patterns.append(compiled)
68
+ elif is_dir_only:
69
+ self.dir_patterns.append(compiled)
70
+ # Also add to main patterns for directory matching
71
+ return compiled
72
+ else:
73
+ return compiled
74
+ except re.error:
75
+ # Skip invalid regex patterns
76
+ pass
77
+
78
+ return None
79
+
80
+ def _wildcard_to_regex(self, pattern: str) -> str:
81
+ """Convert gitignore wildcards to regex."""
82
+ # Escape regex special characters except *, ?, []
83
+ escaped = re.escape(pattern)
84
+
85
+ # Unescape gitignore wildcards
86
+ escaped = escaped.replace(r'\*', '.*') # * matches any sequence
87
+ escaped = escaped.replace(r'\?', '.') # ? matches any single character
88
+
89
+ # Handle character classes [abc]
90
+ escaped = re.sub(r'\\(\[.*?\])', r'\1', escaped)
91
+
92
+ return escaped
93
+
94
+ def is_ignored(self, file_path: Path, project_root: Path) -> bool:
95
+ """Check if file should be ignored based on gitignore patterns."""
96
+ # Convert to relative path from project root
97
+ try:
98
+ rel_path = file_path.relative_to(project_root)
99
+ path_str = str(rel_path).replace('\\', '/') # Use forward slashes
100
+ except ValueError:
101
+ # File is outside project root, don't ignore
102
+ return False
103
+
104
+ # Check negated patterns first (they override)
105
+ for pattern in self.negated_patterns:
106
+ if pattern.search(path_str):
107
+ return False
108
+
109
+ # Check regular patterns
110
+ for pattern in self.patterns:
111
+ if pattern and pattern.search(path_str):
112
+ return True
113
+
114
+ # Check directory patterns
115
+ for pattern in self.dir_patterns:
116
+ if pattern and pattern.search(path_str):
117
+ return True
118
+
119
+ return False
120
+
121
+
122
+ def load_gitignore_patterns(project_path: Path) -> GitIgnoreParser:
123
+ """Load gitignore patterns from project directory."""
124
+ gitignore_path = project_path / '.gitignore'
125
+ return GitIgnoreParser(gitignore_path)
@@ -31,6 +31,7 @@ from .report_generators import (
31
31
  ArticleViewGenerator, HTMLDashboardGenerator,
32
32
  load_project_yaml,
33
33
  )
34
+ from .index_generator import IndexHTMLGenerator
34
35
 
35
36
  __all__ = [
36
37
  'Exporter',
@@ -49,5 +50,6 @@ __all__ = [
49
50
  'ContextViewGenerator',
50
51
  'ArticleViewGenerator',
51
52
  'HTMLDashboardGenerator',
53
+ 'IndexHTMLGenerator',
52
54
  'load_project_yaml',
53
55
  ]