cobol-intel 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. cobol_intel-0.3.0/.dockerignore +13 -0
  2. cobol_intel-0.3.0/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
  3. cobol_intel-0.3.0/.github/ISSUE_TEMPLATE/feature_request.md +22 -0
  4. cobol_intel-0.3.0/.github/workflows/ci.yml +29 -0
  5. cobol_intel-0.3.0/.github/workflows/release.yml +47 -0
  6. cobol_intel-0.3.0/.gitignore +41 -0
  7. cobol_intel-0.3.0/CHANGELOG.md +94 -0
  8. cobol_intel-0.3.0/CONTRIBUTING.md +76 -0
  9. cobol_intel-0.3.0/Dockerfile +35 -0
  10. cobol_intel-0.3.0/Makefile +26 -0
  11. cobol_intel-0.3.0/PKG-INFO +259 -0
  12. cobol_intel-0.3.0/README.md +206 -0
  13. cobol_intel-0.3.0/config/llm_policy.json +36 -0
  14. cobol_intel-0.3.0/copybooks/ACCTTMPL.cpy +3 -0
  15. cobol_intel-0.3.0/copybooks/CUSTMAST.cpy +10 -0
  16. cobol_intel-0.3.0/copybooks/CYCLEA.cpy +3 -0
  17. cobol_intel-0.3.0/copybooks/CYCLEB.cpy +3 -0
  18. cobol_intel-0.3.0/docker-compose.yml +20 -0
  19. cobol_intel-0.3.0/docs/API_GUIDE.md +134 -0
  20. cobol_intel-0.3.0/docs/ARCHITECTURE.md +515 -0
  21. cobol_intel-0.3.0/docs/ARTIFACT_EXAMPLE.md +153 -0
  22. cobol_intel-0.3.0/docs/DECISIONS.md +401 -0
  23. cobol_intel-0.3.0/docs/FINTECH_READINESS.md +109 -0
  24. cobol_intel-0.3.0/docs/OUTPUT_GALLERY.md +79 -0
  25. cobol_intel-0.3.0/docs/PARSER_EVALUATION.md +105 -0
  26. cobol_intel-0.3.0/docs/PLAN.md +485 -0
  27. cobol_intel-0.3.0/docs/PROGRESS.md +336 -0
  28. cobol_intel-0.3.0/docs/RESEARCH.md +202 -0
  29. cobol_intel-0.3.0/docs/SUITE_VISION.md +139 -0
  30. cobol_intel-0.3.0/pyproject.toml +100 -0
  31. cobol_intel-0.3.0/samples/README.md +46 -0
  32. cobol_intel-0.3.0/samples/complex/acctval.cbl +63 -0
  33. cobol_intel-0.3.0/samples/complex/filebatch.cbl +33 -0
  34. cobol_intel-0.3.0/samples/complex/fileio.cbl +22 -0
  35. cobol_intel-0.3.0/samples/complex/interest.cbl +67 -0
  36. cobol_intel-0.3.0/samples/complex/linkdemo.cbl +16 -0
  37. cobol_intel-0.3.0/samples/complex/payment.cbl +75 -0
  38. cobol_intel-0.3.0/samples/complex/sqlops.cbl +33 -0
  39. cobol_intel-0.3.0/samples/fixed_format/calc.cbl +40 -0
  40. cobol_intel-0.3.0/samples/fixed_format/hello.cbl +7 -0
  41. cobol_intel-0.3.0/samples/fixed_format/recon.cbl +15 -0
  42. cobol_intel-0.3.0/samples/free_format/simple.cbl +14 -0
  43. cobol_intel-0.3.0/samples/with_copybook/customer.cbl +22 -0
  44. cobol_intel-0.3.0/samples/with_copybook/replacing_customer.cbl +12 -0
  45. cobol_intel-0.3.0/src/cobol_intel/__init__.py +7 -0
  46. cobol_intel-0.3.0/src/cobol_intel/analysis/__init__.py +24 -0
  47. cobol_intel-0.3.0/src/cobol_intel/analysis/call_graph.py +79 -0
  48. cobol_intel-0.3.0/src/cobol_intel/analysis/cfg_builder.py +237 -0
  49. cobol_intel-0.3.0/src/cobol_intel/analysis/data_flow.py +252 -0
  50. cobol_intel-0.3.0/src/cobol_intel/analysis/dead_code.py +348 -0
  51. cobol_intel-0.3.0/src/cobol_intel/analysis/impact_analyzer.py +175 -0
  52. cobol_intel-0.3.0/src/cobol_intel/analysis/reference_indexer.py +274 -0
  53. cobol_intel-0.3.0/src/cobol_intel/analysis/rules_extractor.py +90 -0
  54. cobol_intel-0.3.0/src/cobol_intel/api/__init__.py +1 -0
  55. cobol_intel-0.3.0/src/cobol_intel/api/app.py +53 -0
  56. cobol_intel-0.3.0/src/cobol_intel/api/constants.py +6 -0
  57. cobol_intel-0.3.0/src/cobol_intel/api/errors.py +32 -0
  58. cobol_intel-0.3.0/src/cobol_intel/api/models.py +67 -0
  59. cobol_intel-0.3.0/src/cobol_intel/api/routers/__init__.py +0 -0
  60. cobol_intel-0.3.0/src/cobol_intel/api/routers/artifacts.py +78 -0
  61. cobol_intel-0.3.0/src/cobol_intel/api/routers/health.py +21 -0
  62. cobol_intel-0.3.0/src/cobol_intel/api/routers/runs.py +216 -0
  63. cobol_intel-0.3.0/src/cobol_intel/api/security.py +31 -0
  64. cobol_intel-0.3.0/src/cobol_intel/cli/__init__.py +7 -0
  65. cobol_intel-0.3.0/src/cobol_intel/cli/main.py +331 -0
  66. cobol_intel-0.3.0/src/cobol_intel/contracts/__init__.py +8 -0
  67. cobol_intel-0.3.0/src/cobol_intel/contracts/ast_output.py +64 -0
  68. cobol_intel-0.3.0/src/cobol_intel/contracts/cfg_output.py +42 -0
  69. cobol_intel-0.3.0/src/cobol_intel/contracts/data_flow_output.py +68 -0
  70. cobol_intel-0.3.0/src/cobol_intel/contracts/dead_code_output.py +47 -0
  71. cobol_intel-0.3.0/src/cobol_intel/contracts/explanation_output.py +53 -0
  72. cobol_intel-0.3.0/src/cobol_intel/contracts/governance.py +62 -0
  73. cobol_intel-0.3.0/src/cobol_intel/contracts/graph_output.py +42 -0
  74. cobol_intel-0.3.0/src/cobol_intel/contracts/impact_output.py +35 -0
  75. cobol_intel-0.3.0/src/cobol_intel/contracts/manifest.py +75 -0
  76. cobol_intel-0.3.0/src/cobol_intel/contracts/reference_output.py +43 -0
  77. cobol_intel-0.3.0/src/cobol_intel/contracts/rules_output.py +33 -0
  78. cobol_intel-0.3.0/src/cobol_intel/contracts/run_id.py +28 -0
  79. cobol_intel-0.3.0/src/cobol_intel/contracts/run_metrics.py +34 -0
  80. cobol_intel-0.3.0/src/cobol_intel/contracts/source_ref.py +18 -0
  81. cobol_intel-0.3.0/src/cobol_intel/core/__init__.py +8 -0
  82. cobol_intel-0.3.0/src/cobol_intel/llm/__init__.py +27 -0
  83. cobol_intel-0.3.0/src/cobol_intel/llm/backend.py +153 -0
  84. cobol_intel-0.3.0/src/cobol_intel/llm/claude_backend.py +127 -0
  85. cobol_intel-0.3.0/src/cobol_intel/llm/context_builder.py +308 -0
  86. cobol_intel-0.3.0/src/cobol_intel/llm/explainer.py +223 -0
  87. cobol_intel-0.3.0/src/cobol_intel/llm/local_backend.py +220 -0
  88. cobol_intel-0.3.0/src/cobol_intel/llm/ollama_backend.py +112 -0
  89. cobol_intel-0.3.0/src/cobol_intel/llm/openai_backend.py +124 -0
  90. cobol_intel-0.3.0/src/cobol_intel/llm/policy.py +229 -0
  91. cobol_intel-0.3.0/src/cobol_intel/outputs/__init__.py +38 -0
  92. cobol_intel-0.3.0/src/cobol_intel/outputs/doc_generator.py +215 -0
  93. cobol_intel-0.3.0/src/cobol_intel/outputs/html_report.py +290 -0
  94. cobol_intel-0.3.0/src/cobol_intel/outputs/writers.py +109 -0
  95. cobol_intel-0.3.0/src/cobol_intel/parsers/__init__.py +8 -0
  96. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_gen/COBOL.g4 +377 -0
  97. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_gen/COBOL.interp +309 -0
  98. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_gen/COBOL.tokens +219 -0
  99. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_gen/COBOLLexer.interp +356 -0
  100. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_gen/COBOLLexer.py +548 -0
  101. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_gen/COBOLLexer.tokens +219 -0
  102. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_gen/COBOLListener.py +660 -0
  103. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_gen/COBOLParser.py +6052 -0
  104. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_gen/COBOLVisitor.py +373 -0
  105. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_gen/__init__.py +1 -0
  106. cobol_intel-0.3.0/src/cobol_intel/parsers/antlr_parser.py +324 -0
  107. cobol_intel-0.3.0/src/cobol_intel/parsers/base.py +82 -0
  108. cobol_intel-0.3.0/src/cobol_intel/parsers/cobol.lark +288 -0
  109. cobol_intel-0.3.0/src/cobol_intel/parsers/lark_parser.py +369 -0
  110. cobol_intel-0.3.0/src/cobol_intel/parsers/preprocessor.py +306 -0
  111. cobol_intel-0.3.0/src/cobol_intel/py.typed +0 -0
  112. cobol_intel-0.3.0/src/cobol_intel/service/__init__.py +26 -0
  113. cobol_intel-0.3.0/src/cobol_intel/service/cache.py +127 -0
  114. cobol_intel-0.3.0/src/cobol_intel/service/doc_service.py +126 -0
  115. cobol_intel-0.3.0/src/cobol_intel/service/explain.py +740 -0
  116. cobol_intel-0.3.0/src/cobol_intel/service/governance.py +163 -0
  117. cobol_intel-0.3.0/src/cobol_intel/service/parallel.py +82 -0
  118. cobol_intel-0.3.0/src/cobol_intel/service/pipeline.py +315 -0
  119. cobol_intel-0.3.0/src/cobol_intel/service/run_metrics.py +68 -0
  120. cobol_intel-0.3.0/tach.toml +53 -0
  121. cobol_intel-0.3.0/tests/__init__.py +0 -0
  122. cobol_intel-0.3.0/tests/contract/__init__.py +0 -0
  123. cobol_intel-0.3.0/tests/contract/test_ast_output.py +77 -0
  124. cobol_intel-0.3.0/tests/contract/test_cfg_output.py +50 -0
  125. cobol_intel-0.3.0/tests/contract/test_data_flow_output.py +82 -0
  126. cobol_intel-0.3.0/tests/contract/test_dead_code_output.py +59 -0
  127. cobol_intel-0.3.0/tests/contract/test_error_codes.py +27 -0
  128. cobol_intel-0.3.0/tests/contract/test_explanation_output.py +80 -0
  129. cobol_intel-0.3.0/tests/contract/test_graph_output.py +31 -0
  130. cobol_intel-0.3.0/tests/contract/test_impact_output.py +32 -0
  131. cobol_intel-0.3.0/tests/contract/test_manifest.py +75 -0
  132. cobol_intel-0.3.0/tests/contract/test_reference_output.py +57 -0
  133. cobol_intel-0.3.0/tests/contract/test_rules_output.py +42 -0
  134. cobol_intel-0.3.0/tests/contract/test_run_id.py +36 -0
  135. cobol_intel-0.3.0/tests/contract/test_run_metrics.py +46 -0
  136. cobol_intel-0.3.0/tests/corpus/__init__.py +0 -0
  137. cobol_intel-0.3.0/tests/corpus/test_antlr4_poc.py +189 -0
  138. cobol_intel-0.3.0/tests/corpus/test_parser_poc.py +287 -0
  139. cobol_intel-0.3.0/tests/corpus/test_phase1_corpus_matrix.py +61 -0
  140. cobol_intel-0.3.0/tests/evaluation/__init__.py +0 -0
  141. cobol_intel-0.3.0/tests/evaluation/test_benchmark.py +32 -0
  142. cobol_intel-0.3.0/tests/evaluation/test_raw_vs_pipeline.py +123 -0
  143. cobol_intel-0.3.0/tests/fixtures/expected/complex_call_graph.json +48 -0
  144. cobol_intel-0.3.0/tests/fixtures/expected/fileio_ast.json +87 -0
  145. cobol_intel-0.3.0/tests/fixtures/expected/payment_rules.json +222 -0
  146. cobol_intel-0.3.0/tests/integration/__init__.py +0 -0
  147. cobol_intel-0.3.0/tests/integration/test_api_runs.py +176 -0
  148. cobol_intel-0.3.0/tests/integration/test_service_pipeline.py +60 -0
  149. cobol_intel-0.3.0/tests/regression/test_phase1_baselines.py +61 -0
  150. cobol_intel-0.3.0/tests/unit/__init__.py +0 -0
  151. cobol_intel-0.3.0/tests/unit/test_api_models.py +61 -0
  152. cobol_intel-0.3.0/tests/unit/test_api_security.py +59 -0
  153. cobol_intel-0.3.0/tests/unit/test_backend_resilience.py +78 -0
  154. cobol_intel-0.3.0/tests/unit/test_cache.py +107 -0
  155. cobol_intel-0.3.0/tests/unit/test_cache_key.py +167 -0
  156. cobol_intel-0.3.0/tests/unit/test_call_graph.py +41 -0
  157. cobol_intel-0.3.0/tests/unit/test_cfg_builder.py +160 -0
  158. cobol_intel-0.3.0/tests/unit/test_cli_main.py +86 -0
  159. cobol_intel-0.3.0/tests/unit/test_context_builder.py +162 -0
  160. cobol_intel-0.3.0/tests/unit/test_data_flow.py +221 -0
  161. cobol_intel-0.3.0/tests/unit/test_dead_code.py +187 -0
  162. cobol_intel-0.3.0/tests/unit/test_doc_generator.py +121 -0
  163. cobol_intel-0.3.0/tests/unit/test_explain_service.py +162 -0
  164. cobol_intel-0.3.0/tests/unit/test_explainer.py +141 -0
  165. cobol_intel-0.3.0/tests/unit/test_governance_service.py +76 -0
  166. cobol_intel-0.3.0/tests/unit/test_html_report.py +106 -0
  167. cobol_intel-0.3.0/tests/unit/test_impact_analyzer.py +128 -0
  168. cobol_intel-0.3.0/tests/unit/test_llm_policy.py +114 -0
  169. cobol_intel-0.3.0/tests/unit/test_local_backend.py +42 -0
  170. cobol_intel-0.3.0/tests/unit/test_openai_backend.py +76 -0
  171. cobol_intel-0.3.0/tests/unit/test_parallel.py +112 -0
  172. cobol_intel-0.3.0/tests/unit/test_parser_extensions.py +67 -0
  173. cobol_intel-0.3.0/tests/unit/test_preprocessor.py +112 -0
  174. cobol_intel-0.3.0/tests/unit/test_reference_indexer.py +227 -0
  175. cobol_intel-0.3.0/tests/unit/test_rules_extractor.py +49 -0
  176. cobol_intel-0.3.0/tools/antlr-4.13.2-complete.jar +0 -0
  177. cobol_intel-0.3.0/tools/benchmark.py +354 -0
  178. cobol_intel-0.3.0/tools/dataset_builder.py +395 -0
  179. cobol_intel-0.3.0/tools/finetune.py +274 -0
  180. cobol_intel-0.3.0/uv.lock +3235 -0
@@ -0,0 +1,13 @@
1
+ .venv
2
+ .git
3
+ __pycache__
4
+ *.pyc
5
+ artifacts/
6
+ tests/
7
+ docs/
8
+ .github/
9
+ .mypy_cache/
10
+ .ruff_cache/
11
+ benchmark_*.json
12
+ benchmark_*.md
13
+ tests_runtime_*
@@ -0,0 +1,30 @@
1
+ ---
2
+ name: Bug Report
3
+ about: Report a bug in cobol-intel
4
+ title: "[Bug] "
5
+ labels: bug
6
+ ---
7
+
8
+ ## Description
9
+
10
+ A clear description of the bug.
11
+
12
+ ## Steps to Reproduce
13
+
14
+ 1. Run `cobol-intel analyze ...`
15
+ 2. ...
16
+
17
+ ## Expected Behavior
18
+
19
+ What you expected to happen.
20
+
21
+ ## Actual Behavior
22
+
23
+ What actually happened. Include error messages or output.
24
+
25
+ ## Environment
26
+
27
+ - OS: [e.g. Windows 11, Ubuntu 24.04]
28
+ - Python version: [e.g. 3.11.8]
29
+ - cobol-intel version: [e.g. 0.1.0]
30
+ - LLM backend (if relevant): [e.g. claude, openai, ollama]
@@ -0,0 +1,22 @@
1
+ ---
2
+ name: Feature Request
3
+ about: Suggest a new feature for cobol-intel
4
+ title: "[Feature] "
5
+ labels: enhancement
6
+ ---
7
+
8
+ ## Problem
9
+
10
+ What problem does this feature solve?
11
+
12
+ ## Proposed Solution
13
+
14
+ How should this feature work?
15
+
16
+ ## Alternatives Considered
17
+
18
+ Any alternative approaches you've thought about.
19
+
20
+ ## Additional Context
21
+
22
+ Any other context, screenshots, or examples.
@@ -0,0 +1,29 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ strategy:
12
+ matrix:
13
+ os: [ubuntu-latest, windows-latest]
14
+ python-version: ["3.11", "3.12"]
15
+ runs-on: ${{ matrix.os }}
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - uses: actions/setup-python@v5
19
+ with:
20
+ python-version: ${{ matrix.python-version }}
21
+ - name: Install dependencies
22
+ run: |
23
+ pip install -e ".[api,dev]"
24
+ - name: Lint
25
+ run: ruff check src/ tests/ tools/
26
+ - name: Check module boundaries
27
+ run: tach check
28
+ - name: Run tests with coverage
29
+ run: pytest --tb=short -q --cov=src/cobol_intel --cov-fail-under=85
@@ -0,0 +1,47 @@
1
+ name: Release to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ build:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.12"
20
+
21
+ - name: Install build tools
22
+ run: pip install build
23
+
24
+ - name: Build package
25
+ run: python -m build
26
+
27
+ - name: Upload artifacts
28
+ uses: actions/upload-artifact@v4
29
+ with:
30
+ name: dist
31
+ path: dist/
32
+
33
+ publish:
34
+ needs: build
35
+ runs-on: ubuntu-latest
36
+ environment: pypi
37
+ permissions:
38
+ id-token: write
39
+ steps:
40
+ - name: Download artifacts
41
+ uses: actions/download-artifact@v4
42
+ with:
43
+ name: dist
44
+ path: dist/
45
+
46
+ - name: Publish to PyPI
47
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,41 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ .venv/
6
+ venv/
7
+ dist/
8
+ build/
9
+ *.egg-info/
10
+ .eggs/
11
+
12
+ # Testing
13
+ .pytest_cache/
14
+ .pytest_tmp/
15
+ pytest_tmp_workspace/
16
+ pytest-cache-files-*/
17
+ tests/.cache/
18
+ .coverage
19
+ htmlcov/
20
+ tmp*/
21
+
22
+ # Artifacts (generated output, tidak di-commit)
23
+ artifacts/
24
+ tests_runtime_artifacts/
25
+ tests_runtime_artifacts_cli/
26
+ .cobol_intel_cache/
27
+
28
+ # Environment
29
+ .env
30
+ .env.*
31
+ !.env.example
32
+
33
+ # IDE
34
+ .vscode/
35
+ .idea/
36
+ *.iml
37
+
38
+ # OS
39
+ .DS_Store
40
+ Thumbs.db
41
+ .claude
@@ -0,0 +1,94 @@
1
+ # Changelog
2
+
3
+ All notable changes to cobol-intel will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+
12
+ - Fine-tuning dataset builder (`tools/dataset_builder.py`) — generates
13
+ Alpaca/ShareGPT instruction-tuning pairs from the analysis pipeline
14
+ - LoRA/PEFT fine-tuning script (`tools/finetune.py`) — CodeLlama-7B compatible,
15
+ QLoRA support, checkpoint resume, reproducible config saving
16
+ - Local fine-tuned model backend (`llm/local_backend.py`) — loads PEFT or
17
+ standard HuggingFace models for fully offline inference
18
+ - Prompt strategy comparison in benchmark (`tools/benchmark.py --compare`)
19
+ - `py.typed` marker for PEP 561 typed package support
20
+
21
+ ### Fixed
22
+
23
+ - pyproject.toml TOML ordering: `dependencies` was incorrectly nested under
24
+ `[project.urls]` instead of `[project]`, causing `uv build` to fail
25
+ - Local backend defaults are now deterministic and install guidance points to
26
+ package extras for offline inference and training
27
+ - Governance now treats the `local` backend as `local_only` for policy and
28
+ redaction decisions
29
+
30
+ ## [0.3.0] - 2026-04-01
31
+
32
+ ### Added
33
+
34
+ - **Control flow graph (CFG) builder**: intra-program CFG with basic blocks, branch/perform/
35
+ fallthrough edges, and unsupported construct warnings (GO TO, ALTER)
36
+ - **Field reference indexer**: per-statement read/write/condition/call_param classification
37
+ with aggregated field usage counts
38
+ - **Data flow analyzer**: directed field-to-field flow graph covering MOVE, COMPUTE,
39
+ READ INTO, WRITE FROM, REWRITE FROM, and CALL USING — with Mermaid diagram output
40
+ - **Dead code detector**: unreachable paragraph detection via BFS reachability, unused
41
+ data item scanning, and trivially dead branch detection (constant conditions)
42
+ - Pipeline now writes `analysis/` artifacts (CFG, data flow, dead code, references)
43
+ for every parsed program
44
+ - Doc generator includes data flow diagrams and dead code findings sections
45
+ - `ArtifactIndex` in manifest now tracks `analysis` artifacts
46
+
47
+ ### Changed
48
+
49
+ - Hardened API ergonomics with a module-level FastAPI app export, version response parity,
50
+ structured error payloads, and richer run summaries
51
+ - Made explanation cache keys safer against stale outputs by including a context revision
52
+ - Synced progress and fintech-readiness docs with the actual Phase 3 feature set
53
+ - Version bumped to 0.3.0
54
+
55
+ ### Fixed
56
+
57
+ - Fixed `make serve-api` to point at a real FastAPI app object
58
+ - Made `make clean` portable by using Python stdlib instead of Unix-only shell commands
59
+
60
+ ## [0.2.0] - 2026-04-01
61
+
62
+ ### Added
63
+
64
+ - Read-only REST API with versioned endpoints (`/api/v1/`)
65
+ - Structured error codes (`ErrorCode` enum) for operational monitoring
66
+ - Cross-platform CI pipeline (Linux + Windows, Python 3.11 + 3.12)
67
+ - Benchmark suite for parse success rate, latency, and token savings
68
+ - Per-program documentation generator (Markdown + HTML)
69
+ - Self-contained HTML report with sidebar navigation, search, and Mermaid graphs
70
+ - Change impact analyzer with call graph traversal and field reference scanning
71
+ - Parallel LLM processing with bounded backend-specific concurrency
72
+ - File-based explanation cache with composite invalidation keys
73
+ - Docker image and docker-compose with optional Ollama sidecar
74
+ - CLI commands: `impact`, `docs`
75
+ - CLI flags: `--parallel`, `--max-workers`, `--cache/--no-cache`, `--format`
76
+ - `Makefile` with common targets: lint, test, bench, build, serve-api
77
+ - PyPI publish workflow on tag push
78
+ - Output gallery and API guide documentation
79
+ - `CHANGELOG.md`, `CONTRIBUTING.md`, and GitHub issue templates
80
+
81
+ ## [0.1.0] - 2026-03-31
82
+
83
+ ### Added
84
+
85
+ - ANTLR4-based COBOL parser with fixed-format and free-format support
86
+ - COPYBOOK resolver with circular dependency detection
87
+ - Call graph builder and business rules extractor
88
+ - Multi-backend LLM explanation engine (Claude, OpenAI, Ollama)
89
+ - Context builder with smart chunking and token budget awareness
90
+ - Governance layer: audit logging, sensitivity classification, prompt redaction
91
+ - Strict policy enforcement and configurable model registry
92
+ - Backend retry/timeout and token budget controls
93
+ - CLI commands: `analyze`, `explain`, `graph`
94
+ - Versioned JSON artifact contracts with Pydantic v2
@@ -0,0 +1,76 @@
1
+ # Contributing to cobol-intel
2
+
3
+ ## Development Setup
4
+
5
+ ```bash
6
+ # Clone the repo
7
+ git clone https://github.com/YOUR_USERNAME/llm-cobol-bussiness.git
8
+ cd llm-cobol-bussiness
9
+
10
+ # Create virtual environment
11
+ python -m venv .venv
12
+ source .venv/bin/activate # Linux/Mac
13
+ # .venv\Scripts\activate # Windows
14
+
15
+ # Install with dev + api dependencies
16
+ pip install -e ".[api,dev]"
17
+ ```
18
+
19
+ ## Running Tests
20
+
21
+ ```bash
22
+ # Run all tests
23
+ pytest
24
+
25
+ # Run with coverage
26
+ pytest --cov=src/cobol_intel
27
+
28
+ # Run specific test categories
29
+ pytest tests/unit/
30
+ pytest tests/contract/
31
+ pytest tests/integration/
32
+ pytest tests/evaluation/
33
+ ```
34
+
35
+ ## Linting
36
+
37
+ ```bash
38
+ ruff check src/ tests/
39
+ ```
40
+
41
+ ## Module Boundaries
42
+
43
+ This project enforces strict module dependency boundaries using [tach](https://docs.gauge.sh/):
44
+
45
+ ```bash
46
+ tach check
47
+ ```
48
+
49
+ The dependency graph:
50
+
51
+ ```text
52
+ contracts (0 deps) <- core <- service <- cli
53
+ parsers <- analysis <- service <- api
54
+ llm <- service
55
+ outputs <- service
56
+ ```
57
+
58
+ Key rules:
59
+ - `contracts` and `core` must never import from `cli` or `api`
60
+ - `analysis` and `parsers` must not depend on LLM backends
61
+ - `cli` and `api` only call `service`, never access internals directly
62
+
63
+ ## Pull Request Process
64
+
65
+ 1. Fork the repo and create a feature branch
66
+ 2. Write tests for new functionality
67
+ 3. Ensure `pytest`, `ruff check`, and `tach check` all pass
68
+ 4. Keep commits focused and well-described
69
+ 5. Open a PR against `main` with a clear description
70
+
71
+ ## Code Style
72
+
73
+ - Python 3.11+
74
+ - Max line length: 100 characters
75
+ - Use type annotations for function signatures
76
+ - Follow existing patterns in the module you're modifying
@@ -0,0 +1,35 @@
1
+ # --- Build stage ---
2
+ FROM python:3.11-slim AS builder
3
+
4
+ WORKDIR /build
5
+
6
+ COPY pyproject.toml README.md ./
7
+ COPY src/ src/
8
+ COPY config/ config/
9
+
10
+ RUN pip install --no-cache-dir --prefix=/install ".[api]"
11
+
12
+ # --- Runtime stage ---
13
+ FROM python:3.11-slim
14
+
15
+ LABEL maintainer="WwzFwz" \
16
+ description="COBOL Intelligence Platform — static analysis + LLM for legacy COBOL"
17
+
18
+ RUN groupadd --gid 1000 cobol && \
19
+ useradd --uid 1000 --gid cobol --create-home cobol
20
+
21
+ COPY --from=builder /install /usr/local
22
+ COPY config/ /app/config/
23
+
24
+ WORKDIR /app
25
+ RUN mkdir -p /app/artifacts && chown -R cobol:cobol /app
26
+
27
+ USER cobol
28
+
29
+ EXPOSE 8000
30
+
31
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
32
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/v1/health')" || exit 1
33
+
34
+ ENTRYPOINT ["cobol-intel"]
35
+ CMD ["--help"]
@@ -0,0 +1,26 @@
1
+ .PHONY: lint test test-cov bench build serve-api docker clean
2
+
3
+ lint:
4
+ ruff check src/ tests/
5
+ tach check
6
+
7
+ test:
8
+ pytest tests/ -x -q --tb=short
9
+
10
+ test-cov:
11
+ pytest tests/ --cov=src/cobol_intel --cov-report=term-missing
12
+
13
+ bench:
14
+ python tools/benchmark.py
15
+
16
+ build:
17
+ python -m build
18
+
19
+ serve-api:
20
+ python -m uvicorn cobol_intel.api.app:app --host 0.0.0.0 --port 8000 --reload
21
+
22
+ docker:
23
+ docker build -t cobol-intel .
24
+
25
+ clean:
26
+ python -c "from pathlib import Path; import shutil; [shutil.rmtree(p, ignore_errors=True) for p in [Path('dist'), Path('build'), Path('.cobol_intel_cache')]]; [shutil.rmtree(p, ignore_errors=True) for p in Path('.').rglob('__pycache__') if p.is_dir()]; [shutil.rmtree(p, ignore_errors=True) for p in Path('.').glob('*.egg-info') if p.is_dir()]"
@@ -0,0 +1,259 @@
1
+ Metadata-Version: 2.4
2
+ Name: cobol-intel
3
+ Version: 0.3.0
4
+ Summary: Open-source platform for understanding, documenting, and analyzing legacy COBOL codebases using static analysis and LLM
5
+ Project-URL: Homepage, https://github.com/WwzFwz/cobol-intel
6
+ Project-URL: Documentation, https://github.com/WwzFwz/cobol-intel/tree/main/docs
7
+ Project-URL: Repository, https://github.com/WwzFwz/cobol-intel
8
+ Project-URL: Issues, https://github.com/WwzFwz/cobol-intel/issues
9
+ Project-URL: Changelog, https://github.com/WwzFwz/cobol-intel/blob/main/CHANGELOG.md
10
+ License: MIT
11
+ Keywords: cobol,fintech,legacy,llm,modernization,static-analysis
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Financial and Insurance Industry
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Code Generators
19
+ Classifier: Topic :: Software Development :: Documentation
20
+ Classifier: Typing :: Typed
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: anthropic>=0.30.0
23
+ Requires-Dist: antlr4-python3-runtime>=4.13.0
24
+ Requires-Dist: lark>=1.2.0
25
+ Requires-Dist: networkx>=3.0
26
+ Requires-Dist: ollama>=0.2.0
27
+ Requires-Dist: openai>=1.0.0
28
+ Requires-Dist: pydantic>=2.0.0
29
+ Requires-Dist: rich>=13.0.0
30
+ Requires-Dist: typer>=0.12.0
31
+ Provides-Extra: api
32
+ Requires-Dist: fastapi>=0.115.0; extra == 'api'
33
+ Requires-Dist: uvicorn[standard]>=0.30.0; extra == 'api'
34
+ Provides-Extra: dev
35
+ Requires-Dist: httpx>=0.27.0; extra == 'dev'
36
+ Requires-Dist: jsonschema>=4.0.0; extra == 'dev'
37
+ Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
38
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
39
+ Requires-Dist: ruff>=0.4.0; extra == 'dev'
40
+ Requires-Dist: tach>=0.9.0; extra == 'dev'
41
+ Provides-Extra: local
42
+ Requires-Dist: peft>=0.11.0; extra == 'local'
43
+ Requires-Dist: torch>=2.2.0; extra == 'local'
44
+ Requires-Dist: transformers>=4.40.0; extra == 'local'
45
+ Provides-Extra: train
46
+ Requires-Dist: accelerate>=0.30.0; extra == 'train'
47
+ Requires-Dist: bitsandbytes>=0.43.0; (platform_system != 'Windows') and extra == 'train'
48
+ Requires-Dist: datasets>=2.19.0; extra == 'train'
49
+ Requires-Dist: peft>=0.11.0; extra == 'train'
50
+ Requires-Dist: torch>=2.2.0; extra == 'train'
51
+ Requires-Dist: transformers>=4.40.0; extra == 'train'
52
+ Description-Content-Type: text/markdown
53
+
54
+ # cobol-intel
55
+
56
+ [![CI](https://github.com/WwzFwz/cobol-intel/actions/workflows/ci.yml/badge.svg)](https://github.com/WwzFwz/cobol-intel/actions/workflows/ci.yml)
57
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/)
58
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
59
+
60
+ Open-source static analysis and LLM explanation platform for legacy COBOL
61
+ codebases. Built for banking, fintech, and regulated modernization workflows.
62
+
63
+ ## Why This Exists
64
+
65
+ Legacy COBOL systems fail the same way: key maintainers retire, documentation
66
+ goes stale, impact analysis is manual, and regulators still need clear
67
+ explanations. `cobol-intel` fixes that with a structured pipeline:
68
+
69
+ ```
70
+ COBOL source → parser & AST → call graph & business rules → LLM explanation
71
+ → impact analysis
72
+ → documentation
73
+ ```
74
+
75
+ The LLM consumes clean, traceable artifacts — not raw COBOL.
76
+
77
+ ## Quickstart
78
+
79
+ ```bash
80
+ pip install cobol-intel
81
+
82
+ # Optional extras
83
+ pip install "cobol-intel[api]" # REST API
84
+ pip install "cobol-intel[local]" # local HuggingFace inference
85
+ pip install "cobol-intel[train]" # fine-tuning scripts
86
+
87
+ # Analyze a COBOL directory
88
+ cobol-intel analyze samples/ --copybook-dir copybooks
89
+
90
+ # Explain with an LLM backend
91
+ cobol-intel explain samples/complex/payment.cbl --model claude --mode business
92
+
93
+ # Generate documentation
94
+ cobol-intel docs artifacts/samples/run_xxx --format html
95
+
96
+ # Analyze change impact
97
+ cobol-intel impact artifacts/samples/run_xxx --changed-program PAYMENT --changed-field WS-BALANCE
98
+ ```
99
+
100
+ Output:
101
+
102
+ ```
103
+ [cobol-intel] analyze: samples/
104
+ Run ID: run_20260401_001
105
+ Status: completed
106
+ Artifacts: artifacts/samples/run_20260401_001
107
+ ```
108
+
109
+ ## Features
110
+
111
+ ### Static Analysis
112
+ - ANTLR4-based parser (fixed + free format COBOL)
113
+ - COPYBOOK resolution with circular dependency detection
114
+ - Call graph builder and business rules extractor
115
+ - Control flow graph (CFG) with branch, perform, and fallthrough edges
116
+ - Field-level data flow analysis (MOVE, COMPUTE, READ INTO, WRITE FROM, CALL)
117
+ - Dead code detection: unreachable paragraphs, unused data items, dead branches
118
+ - Field reference indexer with read/write/condition classification
119
+ - Data item hierarchy with PIC, COMP-3, REDEFINES, OCCURS, level-88
120
+
121
+ ### LLM Explanation
122
+ - Multi-backend: Claude, OpenAI, Ollama
123
+ - Three modes: `technical`, `business`, `audit`
124
+ - Governance: audit logging, sensitivity classification, prompt redaction
125
+ - Policy enforcement, token budgets, retry/timeout
126
+ - Parallel processing with bounded concurrency
127
+ - File-based cache with composite keys
128
+
129
+ ### Change Impact Analysis
130
+ - "If I change field X, what breaks?"
131
+ - BFS traversal on reverse call graph
132
+ - Field reference scanning across ASTs and business rules
133
+ - Configurable depth limit
134
+
135
+ ### Output & Documentation
136
+ - Versioned JSON artifact contracts (Pydantic v2)
137
+ - Markdown + HTML report generation
138
+ - Self-contained HTML with sidebar nav, search, and Mermaid graphs
139
+ - Structured error codes for operational monitoring
140
+
141
+ ### Fine-Tuning
142
+ - Dataset builder: generates instruction-tuning pairs from pipeline output
143
+ - LoRA/PEFT fine-tuning script for CodeLlama-7B or similar (QLoRA supported)
144
+ - Local fine-tuned model backend for fully offline inference
145
+ - Prompt comparison benchmark: raw source vs structured pipeline prompts
146
+
147
+ ### API & Distribution
148
+ - Versioned REST API (`/api/v1/`) with OpenAPI docs and typed error responses
149
+ - Docker image + docker-compose with optional Ollama sidecar
150
+ - Cross-platform CI (Linux + Windows, Python 3.11 + 3.12)
151
+ - PyPI-ready wheel with PEP 561 type stubs
152
+
153
+ ## CLI Commands
154
+
155
+ | Command | Description |
156
+ |---------|-------------|
157
+ | `analyze` | Parse COBOL files, build AST, call graph, business rules |
158
+ | `explain` | Run analysis + LLM explanation |
159
+ | `graph` | Build dependency and call graph artifacts |
160
+ | `impact` | Analyze change impact from a completed run |
161
+ | `docs` | Generate documentation (Markdown or HTML) |
162
+
163
+ Global:
164
+
165
+ ```bash
166
+ cobol-intel --version # Show version
167
+ ```
168
+
169
+ Key flags:
170
+
171
+ ```bash
172
+ --model claude|openai|ollama|local # LLM backend
173
+ --mode technical|business|audit # Explanation style
174
+ --parallel # Enable parallel LLM processing
175
+ --max-workers N # Override concurrency limit
176
+ --cache / --no-cache # Explanation cache toggle
177
+ --strict-policy # Hard block policy violations
178
+ --max-tokens-per-run N # Token budget cap
179
+ --format markdown|html # Documentation format
180
+ ```
181
+
182
+ ## API Usage
183
+
184
+ ```bash
185
+ pip install "cobol-intel[api]"
186
+ cobol-intel-api # starts on port 8000
187
+
188
+ curl http://localhost:8000/api/v1/health
189
+ curl http://localhost:8000/api/v1/runs?output_dir=artifacts
190
+ curl http://localhost:8000/api/v1/version
191
+ ```
192
+
193
+ See [docs/API_GUIDE.md](docs/API_GUIDE.md) for full endpoint reference.
194
+
195
+ ## Output Artifacts
196
+
197
+ Each run produces a stable artifact tree:
198
+
199
+ ```
200
+ artifacts/<project>/<run_id>/
201
+ manifest.json # Run metadata, governance, errors
202
+ ast/ # Per-program AST JSON
203
+ graphs/ # Call graph JSON + Mermaid
204
+ rules/ # Business rules JSON + Markdown
205
+ analysis/ # CFG, data flow, dead code, references
206
+ docs/ # Explanations, documentation
207
+ logs/ # Audit event log
208
+ ```
209
+
210
+ See [docs/OUTPUT_GALLERY.md](docs/OUTPUT_GALLERY.md) for sample artifacts.
211
+
212
+ ## COBOL Subset Coverage
213
+
214
+ - Fixed-format and free-format COBOL
215
+ - `COPY`, circular copy detection, `COPY ... REPLACING`
216
+ - `WORKING-STORAGE`, `FILE`, `LINKAGE` sections
217
+ - `PROCEDURE DIVISION USING`
218
+ - `PIC`, `COMP-3`, `REDEFINES`, `OCCURS`, level-88 conditions
219
+ - `IF`, `EVALUATE`, `PERFORM`, `CALL`, `STRING`, `UNSTRING`, `INSPECT`
220
+ - File I/O: `OPEN`, `READ`, `WRITE`, `REWRITE`, `CLOSE`
221
+ - `EXEC SQL` subset for static-analysis context
222
+
223
+ ## Development
224
+
225
+ ```bash
226
+ git clone https://github.com/WwzFwz/cobol-intel.git
227
+ cd cobol-intel
228
+ pip install -e ".[dev]"
229
+
230
+ make lint # ruff + tach
231
+ make test # pytest
232
+ make bench # benchmark suite
233
+ make build # build wheel
234
+ ```
235
+
236
+ Offline inference and training extras:
237
+
238
+ ```bash
239
+ pip install -e ".[local]" # local HuggingFace backend
240
+ pip install -e ".[train]" # dataset + fine-tuning tooling
241
+ ```
242
+
243
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for full dev setup and guidelines.
244
+
245
+ ## Documentation
246
+
247
+ - [Architecture](docs/ARCHITECTURE.md)
248
+ - [Architecture Decisions](docs/DECISIONS.md)
249
+ - [API Guide](docs/API_GUIDE.md)
250
+ - [Output Gallery](docs/OUTPUT_GALLERY.md)
251
+ - [Fintech Readiness](docs/FINTECH_READINESS.md)
252
+ - [Parser Evaluation](docs/PARSER_EVALUATION.md)
253
+ - [Project Plan](docs/PLAN.md)
254
+ - [Progress](docs/PROGRESS.md)
255
+ - [Changelog](CHANGELOG.md)
256
+
257
+ ## License
258
+
259
+ MIT