k-extract 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. k_extract-0.1.0/.github/workflows/ci.yml +32 -0
  2. k_extract-0.1.0/.github/workflows/release.yml +124 -0
  3. k_extract-0.1.0/.gitignore +10 -0
  4. k_extract-0.1.0/.pre-commit-config.yaml +15 -0
  5. k_extract-0.1.0/.python-version +1 -0
  6. k_extract-0.1.0/CHANGELOG.md +7 -0
  7. k_extract-0.1.0/PKG-INFO +13 -0
  8. k_extract-0.1.0/README.md +0 -0
  9. k_extract-0.1.0/pyproject.toml +99 -0
  10. k_extract-0.1.0/scripts/loop.sh +6 -0
  11. k_extract-0.1.0/scripts/stats.sh +315 -0
  12. k_extract-0.1.0/specs/agent/agent-architecture.md +215 -0
  13. k_extract-0.1.0/specs/agent/agent-tools.md +234 -0
  14. k_extract-0.1.0/specs/agent/prompt-generation.md +112 -0
  15. k_extract-0.1.0/specs/agent/prompt-patterns.md +227 -0
  16. k_extract-0.1.0/specs/concurrency/concurrency-model.md +241 -0
  17. k_extract-0.1.0/specs/data-sources/data-source-config.md +162 -0
  18. k_extract-0.1.0/specs/data-sources/multi-source.md +38 -0
  19. k_extract-0.1.0/specs/decisions/technology-choices.md +94 -0
  20. k_extract-0.1.0/specs/domain/domain-model.md +235 -0
  21. k_extract-0.1.0/specs/index.md +53 -0
  22. k_extract-0.1.0/specs/lessons-learned/v1-to-v2-evolution.md +314 -0
  23. k_extract-0.1.0/specs/process/config-schema.md +151 -0
  24. k_extract-0.1.0/specs/process/extraction-pipeline.md +262 -0
  25. k_extract-0.1.0/specs/process/guided-session.md +102 -0
  26. k_extract-0.1.0/specs/process/job-lifecycle.md +121 -0
  27. k_extract-0.1.0/specs/process/output-format.md +95 -0
  28. k_extract-0.1.0/specs/prompts/implementation.md +197 -0
  29. k_extract-0.1.0/specs/prompts/process-revision.md +49 -0
  30. k_extract-0.1.0/specs/prompts/project-manager.md +104 -0
  31. k_extract-0.1.0/specs/prompts/verifier.md +89 -0
  32. k_extract-0.1.0/specs/reviews/.gitkeep +0 -0
  33. k_extract-0.1.0/specs/reviews/task-001.md +7 -0
  34. k_extract-0.1.0/specs/reviews/task-002.md +22 -0
  35. k_extract-0.1.0/specs/reviews/task-004.md +5 -0
  36. k_extract-0.1.0/specs/reviews/task-006.md +11 -0
  37. k_extract-0.1.0/specs/reviews/task-007.md +13 -0
  38. k_extract-0.1.0/specs/reviews/task-008.md +11 -0
  39. k_extract-0.1.0/specs/reviews/task-009.md +11 -0
  40. k_extract-0.1.0/specs/reviews/task-010.md +16 -0
  41. k_extract-0.1.0/specs/reviews/task-011.md +7 -0
  42. k_extract-0.1.0/specs/reviews/task-012.md +27 -0
  43. k_extract-0.1.0/specs/reviews/task-013.md +5 -0
  44. k_extract-0.1.0/specs/reviews/task-016.md +7 -0
  45. k_extract-0.1.0/specs/reviews/task-019.md +7 -0
  46. k_extract-0.1.0/specs/reviews/task-020.md +15 -0
  47. k_extract-0.1.0/specs/tasks/.gitkeep +0 -0
  48. k_extract-0.1.0/specs/tasks/task-001.md +75 -0
  49. k_extract-0.1.0/specs/tasks/task-002.md +71 -0
  50. k_extract-0.1.0/specs/tasks/task-003.md +61 -0
  51. k_extract-0.1.0/specs/tasks/task-004.md +59 -0
  52. k_extract-0.1.0/specs/tasks/task-005.md +70 -0
  53. k_extract-0.1.0/specs/tasks/task-006.md +75 -0
  54. k_extract-0.1.0/specs/tasks/task-007.md +79 -0
  55. k_extract-0.1.0/specs/tasks/task-008.md +63 -0
  56. k_extract-0.1.0/specs/tasks/task-009.md +65 -0
  57. k_extract-0.1.0/specs/tasks/task-010.md +82 -0
  58. k_extract-0.1.0/specs/tasks/task-011.md +72 -0
  59. k_extract-0.1.0/specs/tasks/task-012.md +84 -0
  60. k_extract-0.1.0/specs/tasks/task-013.md +58 -0
  61. k_extract-0.1.0/specs/tasks/task-014.md +52 -0
  62. k_extract-0.1.0/specs/tasks/task-015.md +64 -0
  63. k_extract-0.1.0/specs/tasks/task-016.md +66 -0
  64. k_extract-0.1.0/specs/tasks/task-017.md +63 -0
  65. k_extract-0.1.0/specs/tasks/task-018.md +74 -0
  66. k_extract-0.1.0/specs/tasks/task-019.md +75 -0
  67. k_extract-0.1.0/specs/tasks/task-020.md +91 -0
  68. k_extract-0.1.0/specs/tasks/task-021.md +39 -0
  69. k_extract-0.1.0/src/k_extract/__init__.py +0 -0
  70. k_extract-0.1.0/src/k_extract/cli/__init__.py +15 -0
  71. k_extract-0.1.0/src/k_extract/cli/display.py +109 -0
  72. k_extract-0.1.0/src/k_extract/cli/init.py +677 -0
  73. k_extract-0.1.0/src/k_extract/cli/jobs.py +191 -0
  74. k_extract-0.1.0/src/k_extract/cli/run.py +115 -0
  75. k_extract-0.1.0/src/k_extract/config/__init__.py +0 -0
  76. k_extract-0.1.0/src/k_extract/config/loader.py +67 -0
  77. k_extract-0.1.0/src/k_extract/config/schema.py +129 -0
  78. k_extract-0.1.0/src/k_extract/config/settings.py +49 -0
  79. k_extract-0.1.0/src/k_extract/domain/__init__.py +0 -0
  80. k_extract-0.1.0/src/k_extract/domain/entities.py +62 -0
  81. k_extract-0.1.0/src/k_extract/domain/mutations.py +155 -0
  82. k_extract-0.1.0/src/k_extract/domain/ontology.py +391 -0
  83. k_extract-0.1.0/src/k_extract/domain/relationships.py +70 -0
  84. k_extract-0.1.0/src/k_extract/extraction/__init__.py +0 -0
  85. k_extract-0.1.0/src/k_extract/extraction/agent.py +447 -0
  86. k_extract-0.1.0/src/k_extract/extraction/hooks.py +164 -0
  87. k_extract-0.1.0/src/k_extract/extraction/logging.py +93 -0
  88. k_extract-0.1.0/src/k_extract/extraction/models.py +76 -0
  89. k_extract-0.1.0/src/k_extract/extraction/prompts.py +181 -0
  90. k_extract-0.1.0/src/k_extract/extraction/store.py +845 -0
  91. k_extract-0.1.0/src/k_extract/extraction/templates/job_description.txt +7 -0
  92. k_extract-0.1.0/src/k_extract/extraction/templates/system_prompt.txt +58 -0
  93. k_extract-0.1.0/src/k_extract/extraction/tools.py +733 -0
  94. k_extract-0.1.0/src/k_extract/pipeline/__init__.py +0 -0
  95. k_extract-0.1.0/src/k_extract/pipeline/database.py +97 -0
  96. k_extract-0.1.0/src/k_extract/pipeline/defines.py +130 -0
  97. k_extract-0.1.0/src/k_extract/pipeline/fingerprint.py +212 -0
  98. k_extract-0.1.0/src/k_extract/pipeline/jobs.py +344 -0
  99. k_extract-0.1.0/src/k_extract/pipeline/orchestrator.py +585 -0
  100. k_extract-0.1.0/src/k_extract/pipeline/progress.py +208 -0
  101. k_extract-0.1.0/src/k_extract/pipeline/sources.py +318 -0
  102. k_extract-0.1.0/src/k_extract/pipeline/worker.py +210 -0
  103. k_extract-0.1.0/src/k_extract/pipeline/writer.py +56 -0
  104. k_extract-0.1.0/tests/__init__.py +0 -0
  105. k_extract-0.1.0/tests/cli/__init__.py +0 -0
  106. k_extract-0.1.0/tests/cli/test_cli.py +10 -0
  107. k_extract-0.1.0/tests/cli/test_display.py +176 -0
  108. k_extract-0.1.0/tests/cli/test_init.py +564 -0
  109. k_extract-0.1.0/tests/cli/test_jobs.py +649 -0
  110. k_extract-0.1.0/tests/config/__init__.py +0 -0
  111. k_extract-0.1.0/tests/config/test_loader.py +191 -0
  112. k_extract-0.1.0/tests/config/test_schema.py +404 -0
  113. k_extract-0.1.0/tests/config/test_settings.py +69 -0
  114. k_extract-0.1.0/tests/domain/__init__.py +0 -0
  115. k_extract-0.1.0/tests/domain/test_entities.py +111 -0
  116. k_extract-0.1.0/tests/domain/test_mutations.py +342 -0
  117. k_extract-0.1.0/tests/domain/test_ontology.py +924 -0
  118. k_extract-0.1.0/tests/domain/test_relationships.py +161 -0
  119. k_extract-0.1.0/tests/e2e/__init__.py +0 -0
  120. k_extract-0.1.0/tests/e2e/test_full_pipeline.py +259 -0
  121. k_extract-0.1.0/tests/extraction/__init__.py +0 -0
  122. k_extract-0.1.0/tests/extraction/templates/__init__.py +0 -0
  123. k_extract-0.1.0/tests/extraction/test_agent.py +1325 -0
  124. k_extract-0.1.0/tests/extraction/test_prompts.py +378 -0
  125. k_extract-0.1.0/tests/extraction/test_store.py +930 -0
  126. k_extract-0.1.0/tests/extraction/test_tools.py +1793 -0
  127. k_extract-0.1.0/tests/pipeline/__init__.py +0 -0
  128. k_extract-0.1.0/tests/pipeline/test_database.py +157 -0
  129. k_extract-0.1.0/tests/pipeline/test_defines.py +132 -0
  130. k_extract-0.1.0/tests/pipeline/test_fingerprint.py +272 -0
  131. k_extract-0.1.0/tests/pipeline/test_jobs.py +657 -0
  132. k_extract-0.1.0/tests/pipeline/test_orchestrator.py +776 -0
  133. k_extract-0.1.0/tests/pipeline/test_progress.py +316 -0
  134. k_extract-0.1.0/tests/pipeline/test_sources.py +395 -0
  135. k_extract-0.1.0/tests/pipeline/test_worker.py +734 -0
  136. k_extract-0.1.0/tests/pipeline/test_writer.py +179 -0
  137. k_extract-0.1.0/uv.lock +1043 -0
@@ -0,0 +1,32 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+ branches: [main]
6
+
7
+ jobs:
8
+ lint-and-test:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v4
12
+
13
+ - name: Install uv
14
+ uses: astral-sh/setup-uv@v5
15
+
16
+ - name: Set up Python
17
+ run: uv python install 3.12
18
+
19
+ - name: Install dependencies
20
+ run: uv sync --dev
21
+
22
+ - name: Lint
23
+ run: uv run ruff check src/ tests/
24
+
25
+ - name: Format check
26
+ run: uv run ruff format --check src/ tests/
27
+
28
+ - name: Type check
29
+ run: uv run pyright
30
+
31
+ - name: Test
32
+ run: uv run pytest
@@ -0,0 +1,124 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ permissions:
8
+ contents: read
9
+
10
+ jobs:
11
+ # -----------------------------------------------------------
12
+ # Job 1: Determine version, tag, and create GitHub Release
13
+ # -----------------------------------------------------------
14
+ release:
15
+ runs-on: ubuntu-latest
16
+ if: github.repository == 'jsell-rh/k-extract'
17
+
18
+ permissions:
19
+ contents: write
20
+ id-token: write
21
+
22
+ outputs:
23
+ released: ${{ steps.semrel.outputs.released }}
24
+ version: ${{ steps.semrel.outputs.version }}
25
+ tag: ${{ steps.semrel.outputs.tag }}
26
+
27
+ steps:
28
+ - name: Checkout
29
+ uses: actions/checkout@v4
30
+ with:
31
+ fetch-depth: 0
32
+ persist-credentials: false
33
+
34
+ - name: Configure Git credentials
35
+ run: |
36
+ git config user.name "github-actions[bot]"
37
+ git config user.email "github-actions[bot]@users.noreply.github.com"
38
+
39
+ - name: Python Semantic Release
40
+ id: semrel
41
+ uses: python-semantic-release/python-semantic-release@v10
42
+ with:
43
+ github_token: ${{ secrets.GITHUB_TOKEN }}
44
+ root_options: "-vv"
45
+
46
+ # -----------------------------------------------------------
47
+ # Job 2: Build distribution artifacts
48
+ # -----------------------------------------------------------
49
+ build:
50
+ needs: release
51
+ if: needs.release.outputs.released == 'true'
52
+ runs-on: ubuntu-latest
53
+
54
+ steps:
55
+ - name: Checkout released tag
56
+ uses: actions/checkout@v4
57
+ with:
58
+ ref: ${{ needs.release.outputs.tag }}
59
+
60
+ - name: Install uv
61
+ uses: astral-sh/setup-uv@v5
62
+
63
+ - name: Set up Python
64
+ run: uv python install 3.12
65
+
66
+ - name: Build package
67
+ run: uv build
68
+
69
+ - name: Upload dist artifacts
70
+ uses: actions/upload-artifact@v4
71
+ with:
72
+ name: dist
73
+ path: dist/
74
+ if-no-files-found: error
75
+
76
+ # -----------------------------------------------------------
77
+ # Job 3: Publish to PyPI using OIDC Trusted Publishing
78
+ # -----------------------------------------------------------
79
+ publish-pypi:
80
+ needs: [release, build]
81
+ if: needs.release.outputs.released == 'true'
82
+ runs-on: ubuntu-latest
83
+
84
+ environment:
85
+ name: pypi
86
+ url: https://pypi.org/project/k-extract/${{ needs.release.outputs.version }}/
87
+
88
+ permissions:
89
+ id-token: write
90
+
91
+ steps:
92
+ - name: Download dist artifacts
93
+ uses: actions/download-artifact@v4
94
+ with:
95
+ name: dist
96
+ path: dist/
97
+
98
+ - name: Publish to PyPI
99
+ uses: pypa/gh-action-pypi-publish@release/v1
100
+
101
+ # -----------------------------------------------------------
102
+ # Job 4: Upload artifacts to the GitHub Release
103
+ # -----------------------------------------------------------
104
+ publish-github-release:
105
+ needs: [release, build]
106
+ if: needs.release.outputs.released == 'true'
107
+ runs-on: ubuntu-latest
108
+
109
+ permissions:
110
+ contents: write
111
+ id-token: write
112
+
113
+ steps:
114
+ - name: Download dist artifacts
115
+ uses: actions/download-artifact@v4
116
+ with:
117
+ name: dist
118
+ path: dist/
119
+
120
+ - name: Upload to GitHub Release
121
+ uses: python-semantic-release/publish-action@v10
122
+ with:
123
+ github_token: ${{ secrets.GITHUB_TOKEN }}
124
+ tag: ${{ needs.release.outputs.tag }}
@@ -0,0 +1,10 @@
1
+ extraction.yaml
2
+ __pycache__/
3
+ *.pyc
4
+ *.egg-info/
5
+ dist/
6
+ .pytest_cache/
7
+ graph.jsonl
8
+ extraction.db
9
+ extraction.db*
10
+ logs/
@@ -0,0 +1,15 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.11.6
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+ - id: ruff-format
8
+ - repo: local
9
+ hooks:
10
+ - id: pyright
11
+ name: pyright
12
+ entry: uv run pyright
13
+ language: system
14
+ types: [python]
15
+ pass_filenames: false
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,7 @@
1
+ # CHANGELOG
2
+
3
+ <!-- version list -->
4
+
5
+ ## v0.1.0 (2026-04-09)
6
+
7
+ - Initial Release
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.4
2
+ Name: k-extract
3
+ Version: 0.1.0
4
+ Summary: General-purpose knowledge graph extraction framework
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: claude-agent-sdk>=0.1.56
7
+ Requires-Dist: click>=8.1
8
+ Requires-Dist: pathspec>=0.12
9
+ Requires-Dist: pydantic-settings>=2.0
10
+ Requires-Dist: pyyaml>=6.0
11
+ Requires-Dist: rich>=13.0
12
+ Requires-Dist: sqlalchemy>=2.0
13
+ Requires-Dist: structlog>=24.0
File without changes
@@ -0,0 +1,99 @@
1
+ [project]
2
+ name = "k-extract"
3
+ version = "0.1.0"
4
+ description = "General-purpose knowledge graph extraction framework"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "claude-agent-sdk>=0.1.56",
9
+ "click>=8.1",
10
+ "rich>=13.0",
11
+ "sqlalchemy>=2.0",
12
+ "pydantic-settings>=2.0",
13
+ "structlog>=24.0",
14
+ "pyyaml>=6.0",
15
+ "pathspec>=0.12",
16
+ ]
17
+
18
+ [dependency-groups]
19
+ dev = [
20
+ "pytest>=8.0",
21
+ "ruff>=0.4",
22
+ "pyright>=1.1",
23
+ "pre-commit>=3.0",
24
+ "pytest-asyncio>=1.3.0",
25
+ ]
26
+
27
+ [project.scripts]
28
+ k-extract = "k_extract.cli:main"
29
+
30
+ [build-system]
31
+ requires = ["hatchling"]
32
+ build-backend = "hatchling.build"
33
+
34
+ [tool.hatch.build.targets.wheel]
35
+ packages = ["src/k_extract"]
36
+
37
+ [tool.pytest.ini_options]
38
+ markers = [
39
+ "e2e: end-to-end integration tests requiring a real Claude API key (deselected by default)",
40
+ ]
41
+
42
+ [tool.ruff]
43
+ src = ["src"]
44
+ target-version = "py312"
45
+
46
+ [tool.ruff.lint]
47
+ select = ["E", "F", "I", "UP", "B", "SIM"]
48
+
49
+ [tool.pyright]
50
+ pythonVersion = "3.12"
51
+ venvPath = "."
52
+ venv = ".venv"
53
+ include = ["src"]
54
+ typeCheckingMode = "standard"
55
+
56
+ # ---------------------------------------------------------------------------
57
+ # Semantic Release
58
+ # ---------------------------------------------------------------------------
59
+ [tool.semantic_release]
60
+ commit_parser = "conventional"
61
+ version_toml = ["pyproject.toml:project.version"]
62
+ allow_zero_version = true
63
+ major_on_zero = false
64
+
65
+ [tool.semantic_release.branches.main]
66
+ match = "main"
67
+
68
+ [tool.semantic_release.publish]
69
+ upload_to_vcs_release = false
70
+
71
+ [tool.semantic_release.commit_author]
72
+ env = "GIT_COMMIT_AUTHOR"
73
+ default = "github-actions[bot] <github-actions[bot]@users.noreply.github.com>"
74
+
75
+ [tool.semantic_release.commit_parser_options]
76
+ minor_tags = ["feat"]
77
+ patch_tags = ["fix", "perf"]
78
+ parse_squash_commits = true
79
+ ignore_merge_commits = true
80
+
81
+ [tool.semantic_release.changelog]
82
+ exclude_commit_patterns = [
83
+ '''chore(?:\([^)]*?\))?: .+''',
84
+ '''ci(?:\([^)]*?\))?: .+''',
85
+ '''refactor(?:\([^)]*?\))?: .+''',
86
+ '''style(?:\([^)]*?\))?: .+''',
87
+ '''test(?:\([^)]*?\))?: .+''',
88
+ '''docs(?:\([^)]*?\))?: .+''',
89
+ '''build(?:\([^)]*?\))?: .+''',
90
+ '''review(?:\([^)]*?\))?: .+''',
91
+ ]
92
+
93
+ [tool.semantic_release.changelog.default_templates]
94
+ changelog_file = "CHANGELOG.md"
95
+ output_format = "md"
96
+
97
+ [tool.semantic_release.remote]
98
+ type = "github"
99
+ token = { env = "GH_TOKEN" }
@@ -0,0 +1,6 @@
1
+ while true; do
2
+ claude --model opus[1m] --dangerously-skip-permissions < specs/prompts/project-manager.md
3
+ claude --model opus[1m] --dangerously-skip-permissions < specs/prompts/implementation.md
4
+ claude --model opus[1m] --dangerously-skip-permissions < specs/prompts/verifier.md
5
+ claude --model opus[1m] --dangerously-skip-permissions < specs/prompts/process-revision.md
6
+ done
@@ -0,0 +1,315 @@
1
+ #!/usr/bin/env bash
2
+ # k-extract Build Stats — tracks velocity, quality, and cost metrics across tasks.
3
+ # Usage: ./scripts/stats.sh [--json]
4
+ set -uo pipefail
5
+
6
+ REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
7
+ TASKS_DIR="$REPO_ROOT/specs/tasks"
8
+ REVIEWS_DIR="$REPO_ROOT/specs/reviews"
9
+ JSON_MODE=false
10
+ [[ "${1:-}" == "--json" ]] && JSON_MODE=true
11
+
12
+ # --- Colors (disabled for JSON or non-tty) ---
13
+ if $JSON_MODE; then
14
+ BOLD="" DIM="" RESET="" GREEN="" YELLOW="" RED="" CYAN="" BLUE="" MAGENTA=""
15
+ else
16
+ BOLD=$'\033[1m' DIM=$'\033[2m' RESET=$'\033[0m'
17
+ GREEN=$'\033[32m' YELLOW=$'\033[33m' RED=$'\033[31m'
18
+ CYAN=$'\033[36m' BLUE=$'\033[34m' MAGENTA=$'\033[35m'
19
+ fi
20
+
21
+ # --- Task status counts ---
22
+ total_tasks=0; complete=0; in_review=0; in_progress=0; not_started=0
23
+ declare -a task_names=() task_statuses=() task_titles=()
24
+
25
+ for f in "$TASKS_DIR"/task-*.md; do
26
+ [[ -f "$f" ]] || continue
27
+ total_tasks=$((total_tasks + 1))
28
+ name=$(basename "$f" .md)
29
+ status=$(grep -oP '(?<=\*\*(Status|Progress):\*\* `)[^`]+' "$f" 2>/dev/null | head -1 || echo "unknown")
30
+ title=$(head -1 "$f" | sed -E 's/^# (TASK-[0-9]+|Task [0-9]+): //')
31
+ # Replace em-dashes with plain dashes for consistent column width
32
+ title="${title//—/-}"
33
+ # Truncate to 38 chars with ellipsis in the middle
34
+ if [[ ${#title} -gt 38 ]]; then
35
+ title="${title:0:18}..${title: -18}"
36
+ fi
37
+ task_names+=("$name")
38
+ task_statuses+=("$status")
39
+ task_titles+=("$title")
40
+ case "$status" in
41
+ complete) complete=$((complete + 1)) ;;
42
+ ready-for-review|in-review) in_review=$((in_review + 1)) ;;
43
+ in-progress) in_progress=$((in_progress + 1)) ;;
44
+ not-started) not_started=$((not_started + 1)) ;;
45
+ esac
46
+ done
47
+
48
+ # --- Review metrics per task ---
49
+ declare -A review_rounds=() review_findings=()
50
+
51
+ for f in "$REVIEWS_DIR"/task-*.md; do
52
+ [[ -f "$f" ]] || continue
53
+ name=$(basename "$f" .md)
54
+ round_headers=$(grep -c '^## Round [0-9]' "$f" 2>/dev/null || true)
55
+ rn_headers=$(grep -c '^## R[0-9]' "$f" 2>/dev/null || true)
56
+ findings_headers=$(grep -c '^## Findings' "$f" 2>/dev/null || true)
57
+ rounds=$((${round_headers:-0} + ${rn_headers:-0} + ${findings_headers:-0}))
58
+ findings=$(grep -c 'process-revision-complete' "$f" 2>/dev/null || true)
59
+ findings=${findings:-0}
60
+ review_rounds[$name]=$((rounds))
61
+ review_findings[$name]=$((findings))
62
+ done
63
+
64
+ # --- Wall clock time per task (from git history) ---
65
+ OUTLIER_GAP=3600 # 1 hour in seconds
66
+
67
+ declare -A task_wall_clock=() task_first_commit=() task_last_commit=()
68
+ declare -A task_active_seconds=()
69
+ declare -A task_commits=()
70
+
71
+ compute_task_time() {
72
+ local task_id="$1"
73
+ local commits
74
+ commits=$(cd "$REPO_ROOT" && git log --all --format="%at" --grep="$task_id" --reverse 2>/dev/null)
75
+ [[ -z "$commits" ]] && return
76
+
77
+ local first last prev elapsed active_time=0 count=0
78
+ while IFS= read -r ts; do
79
+ [[ -z "$ts" ]] && continue
80
+ if [[ $count -eq 0 ]]; then
81
+ first=$ts
82
+ else
83
+ local gap=$((ts - prev))
84
+ if [[ $gap -gt 0 && $gap -lt $OUTLIER_GAP ]]; then
85
+ active_time=$((active_time + gap))
86
+ fi
87
+ fi
88
+ last=$ts
89
+ prev=$ts
90
+ count=$((count + 1))
91
+ done <<< "$commits"
92
+
93
+ [[ $count -lt 2 ]] && return
94
+
95
+ task_first_commit[$task_id]=$first
96
+ task_last_commit[$task_id]=$last
97
+ task_wall_clock[$task_id]=$((last - first))
98
+ task_active_seconds[$task_id]=$active_time
99
+ task_commits[$task_id]=$count
100
+ }
101
+
102
+ for name in "${task_names[@]}"; do
103
+ compute_task_time "$name"
104
+ done
105
+
106
+ # --- Code metrics ---
107
+ total_py_lines=$(find "$REPO_ROOT/src" -name "*.py" -not -path "*/.git/*" 2>/dev/null | xargs wc -l 2>/dev/null | tail -1 | awk '{print $1}')
108
+ total_py_lines=${total_py_lines:-0}
109
+ test_py_lines=$(find "$REPO_ROOT" -name "test_*.py" -o -name "*_test.py" -not -path "*/.git/*" 2>/dev/null | xargs wc -l 2>/dev/null | tail -1 | awk '{print $1}')
110
+ test_py_lines=${test_py_lines:-0}
111
+ prod_py_lines=$((total_py_lines - test_py_lines))
112
+
113
+ total_commits=$(cd "$REPO_ROOT" && git log --all --oneline | wc -l)
114
+
115
+ # Overall timeline
116
+ first_commit_ts=$(cd "$REPO_ROOT" && git log --all --format="%at" --reverse | head -1)
117
+ last_commit_ts=$(cd "$REPO_ROOT" && git log --all --format="%at" | head -1)
118
+ total_wall_seconds=$((last_commit_ts - first_commit_ts))
119
+
120
+ # Checklist items (process learning)
121
+ checklist_items=$(cd "$REPO_ROOT" && git log --all --oneline --grep="fix(process)" | wc -l)
122
+
123
+ # --- Format helpers ---
124
+ fmt_duration() {
125
+ local secs=$1
126
+ local hrs=$((secs / 3600))
127
+ local mins=$(( (secs % 3600) / 60 ))
128
+ local s=$((secs % 60))
129
+ if [[ $hrs -gt 0 ]]; then
130
+ printf "%dh %dm %ds" "$hrs" "$mins" "$s"
131
+ else
132
+ printf "%dm %ds" "$mins" "$s"
133
+ fi
134
+ }
135
+
136
+ progress_bar() {
137
+ local total=$1 n_complete=$2 n_review=$3 n_progress=$4 n_not_started=$5
138
+ local width=30
139
+ local w_complete=$((n_complete * width / total))
140
+ local w_review=$((n_review * width / total))
141
+ local w_progress=$((n_progress * width / total))
142
+ local w_not_started=$((width - w_complete - w_review - w_progress))
143
+
144
+ local seg=""
145
+ for ((i=0; i<w_complete; i++)); do seg+="#"; done
146
+ printf "%s%s" "${GREEN}" "$seg"
147
+ seg=""
148
+ for ((i=0; i<w_review; i++)); do seg+="#"; done
149
+ printf "%s%s" "${MAGENTA}" "$seg"
150
+ seg=""
151
+ for ((i=0; i<w_progress; i++)); do seg+="#"; done
152
+ printf "%s%s" "${BLUE}" "$seg"
153
+ seg=""
154
+ for ((i=0; i<w_not_started; i++)); do seg+="-"; done
155
+ printf "%s%s%s" "${DIM}" "$seg" "${RESET}"
156
+ }
157
+
158
+ # --- JSON output ---
159
+ if $JSON_MODE; then
160
+ echo "{"
161
+ echo " \"summary\": {"
162
+ echo " \"total_tasks\": $total_tasks,"
163
+ echo " \"complete\": $complete,"
164
+ echo " \"in_review\": $in_review,"
165
+ echo " \"in_progress\": $in_progress,"
166
+ echo " \"not_started\": $not_started,"
167
+ echo " \"progress_pct\": $((complete * 100 / total_tasks)),"
168
+ echo " \"total_commits\": $total_commits,"
169
+ echo " \"prod_lines\": $prod_py_lines,"
170
+ echo " \"test_lines\": $test_py_lines,"
171
+ if [[ $total_py_lines -gt 0 ]]; then
172
+ echo " \"test_ratio\": \"$(printf '%.1f' "$(echo "scale=1; $test_py_lines * 100 / $total_py_lines" | bc)")%\","
173
+ else
174
+ echo " \"test_ratio\": \"0.0%\","
175
+ fi
176
+ echo " \"total_wall_clock_seconds\": $total_wall_seconds,"
177
+ echo " \"checklist_items\": $checklist_items"
178
+ echo " },"
179
+ echo " \"tasks\": ["
180
+ first=true
181
+ for i in "${!task_names[@]}"; do
182
+ name="${task_names[$i]}"
183
+ status="${task_statuses[$i]}"
184
+ rounds=${review_rounds[$name]:-0}
185
+ findings=${review_findings[$name]:-0}
186
+ active=${task_active_seconds[$name]:-0}
187
+ wall=${task_wall_clock[$name]:-0}
188
+ $first || echo ","
189
+ first=false
190
+ commits=${task_commits[$name]:-0}
191
+ printf ' {"task": "%s", "status": "%s", "commits": %d, "review_rounds": %d, "findings": %d, "active_seconds": %d, "wall_seconds": %d}' \
192
+ "$name" "$status" "$commits" "$rounds" "$findings" "$active" "$wall"
193
+ done
194
+ echo ""
195
+ echo " ]"
196
+ echo "}"
197
+ exit 0
198
+ fi
199
+
200
+ # --- Human output ---
201
+ echo ""
202
+ echo "${BOLD}k-extract Build Stats${RESET}"
203
+ echo "${DIM}$(date '+%Y-%m-%d %H:%M')${RESET}"
204
+ echo ""
205
+
206
+ # Progress
207
+ if [[ $total_tasks -gt 0 ]]; then
208
+ pct=$((complete * 100 / total_tasks))
209
+ echo "${BOLD}Progress${RESET}"
210
+ printf " [$(progress_bar $total_tasks $complete $in_review $in_progress $not_started)] %d%% (%d/%d tasks)\n" "$pct" "$complete" "$total_tasks"
211
+ echo " ${GREEN}$complete complete${RESET} ${MAGENTA}$in_review in review${RESET} ${BLUE}$in_progress in progress${RESET} ${DIM}$not_started not started${RESET}"
212
+ else
213
+ echo "${BOLD}Progress${RESET}"
214
+ echo " ${DIM}No tasks found in $TASKS_DIR${RESET}"
215
+ fi
216
+ echo ""
217
+
218
+ # Timeline
219
+ echo "${BOLD}Timeline${RESET}"
220
+ echo " Total wall clock: $(fmt_duration $total_wall_seconds)"
221
+ echo " Total commits: $total_commits"
222
+ echo " Checklist items: $checklist_items (accumulated process learnings)"
223
+ echo ""
224
+
225
+ # Code
226
+ echo "${BOLD}Code${RESET}"
227
+ echo " Production: $prod_py_lines lines"
228
+ echo " Test: $test_py_lines lines"
229
+ if [[ $total_py_lines -gt 0 ]]; then
230
+ echo " Test ratio: $(printf '%.0f' "$(echo "scale=1; $test_py_lines * 100 / $total_py_lines" | bc)")% of total"
231
+ fi
232
+ if [[ $total_wall_seconds -gt 0 && $prod_py_lines -gt 0 ]]; then
233
+ echo " Throughput: ~$(( prod_py_lines * 3600 / total_wall_seconds )) prod lines/hr"
234
+ fi
235
+ echo ""
236
+
237
+ # Per-task breakdown
238
+ if [[ $total_tasks -gt 0 ]]; then
239
+ echo "${BOLD}Task Breakdown${RESET}"
240
+ printf " ${DIM}%-8s %-38s %-16s %7s %6s %8s %14s${RESET}\n" "TASK" "TITLE" "STATUS" "COMMITS" "ROUNDS" "FINDINGS" "ACTIVE TIME"
241
+ printf " %s%s%s\n" "${DIM}" "------------------------------------------------------------------------------------------------------" "${RESET}"
242
+
243
+ total_rounds=0; total_findings=0; total_active=0; total_task_commits=0
244
+ for i in "${!task_names[@]}"; do
245
+ name="${task_names[$i]}"
246
+ status="${task_statuses[$i]}"
247
+ rounds=${review_rounds[$name]:-0}
248
+ findings=${review_findings[$name]:-0}
249
+ active=${task_active_seconds[$name]:-0}
250
+ commits=${task_commits[$name]:-0}
251
+
252
+ total_rounds=$((total_rounds + rounds))
253
+ total_findings=$((total_findings + findings))
254
+ total_active=$((total_active + active))
255
+ total_task_commits=$((total_task_commits + commits))
256
+
257
+ case "$status" in
258
+ complete) status_label="complete"; status_color="$GREEN" ;;
259
+ ready-for-review) status_label="ready-for-review"; status_color="$MAGENTA" ;;
260
+ needs-revision) status_label="needs-revision"; status_color="$YELLOW" ;;
261
+ in-review) status_label="in-review"; status_color="$YELLOW" ;;
262
+ in-progress) status_label="in-progress"; status_color="$BLUE" ;;
263
+ not-started) status_label="not-started"; status_color="$DIM" ;;
264
+ *) status_label="$status"; status_color="" ;;
265
+ esac
266
+ sc="${status_color}$(printf '%-16s' "$status_label")${RESET}"
267
+
268
+ rpad=$(printf "%6d" "$rounds")
269
+ if [[ $rounds -eq 0 ]]; then
270
+ rc="${DIM}${rpad}${RESET}"
271
+ elif [[ $rounds -le 3 ]]; then
272
+ rc="${GREEN}${rpad}${RESET}"
273
+ elif [[ $rounds -le 7 ]]; then
274
+ rc="${YELLOW}${rpad}${RESET}"
275
+ else
276
+ rc="${RED}${rpad}${RESET}"
277
+ fi
278
+
279
+ fpad=$(printf "%8d" "$findings")
280
+
281
+ if [[ $active -gt 0 ]]; then
282
+ at=$(printf "%14s" "$(fmt_duration $active)")
283
+ else
284
+ at="$(printf '%12s' '')${DIM}--${RESET}"
285
+ fi
286
+
287
+ cpad=$(printf "%7d" "$commits")
288
+ title="${task_titles[$i]}"
289
+ printf " %-8s %-38s %s %s %s %s %s\n" "$name" "$title" "$sc" "$cpad" "$rc" "$fpad" "$at"
290
+ done
291
+
292
+ printf " %s%s%s\n" "${DIM}" "------------------------------------------------------------------------------------------------------" "${RESET}"
293
+ printf " ${BOLD}%-8s %-38s %-16s %7d %6d %8d %14s${RESET}\n" "TOTAL" "" "" "$total_task_commits" "$total_rounds" "$total_findings" "$(fmt_duration $total_active)"
294
+ echo ""
295
+
296
+ # Review efficiency
297
+ if [[ $total_findings -gt 0 ]]; then
298
+ echo "${BOLD}Review Efficiency${RESET}"
299
+ echo " Total findings: $total_findings"
300
+ echo " Total review rounds: $total_rounds"
301
+ echo " Findings per round: $(printf '%.1f' "$(echo "scale=1; $total_findings / $total_rounds" | bc)")"
302
+ if [[ $complete -gt 0 ]]; then
303
+ echo " Avg rounds per task: $(printf '%.1f' "$(echo "scale=1; $total_rounds / $complete" | bc)") (completed tasks only)"
304
+ fi
305
+ echo ""
306
+
307
+ echo " ${DIM}Highest review effort:${RESET}"
308
+ for name in "${task_names[@]}"; do
309
+ r=${review_rounds[$name]:-0}
310
+ f=${review_findings[$name]:-0}
311
+ [[ $r -gt 0 ]] && echo " $r rounds, $f findings $name"
312
+ done | sort -rn | head -3
313
+ echo ""
314
+ fi
315
+ fi