k-extract 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- k_extract-0.1.0/.github/workflows/ci.yml +32 -0
- k_extract-0.1.0/.github/workflows/release.yml +124 -0
- k_extract-0.1.0/.gitignore +10 -0
- k_extract-0.1.0/.pre-commit-config.yaml +15 -0
- k_extract-0.1.0/.python-version +1 -0
- k_extract-0.1.0/CHANGELOG.md +7 -0
- k_extract-0.1.0/PKG-INFO +13 -0
- k_extract-0.1.0/README.md +0 -0
- k_extract-0.1.0/pyproject.toml +99 -0
- k_extract-0.1.0/scripts/loop.sh +6 -0
- k_extract-0.1.0/scripts/stats.sh +315 -0
- k_extract-0.1.0/specs/agent/agent-architecture.md +215 -0
- k_extract-0.1.0/specs/agent/agent-tools.md +234 -0
- k_extract-0.1.0/specs/agent/prompt-generation.md +112 -0
- k_extract-0.1.0/specs/agent/prompt-patterns.md +227 -0
- k_extract-0.1.0/specs/concurrency/concurrency-model.md +241 -0
- k_extract-0.1.0/specs/data-sources/data-source-config.md +162 -0
- k_extract-0.1.0/specs/data-sources/multi-source.md +38 -0
- k_extract-0.1.0/specs/decisions/technology-choices.md +94 -0
- k_extract-0.1.0/specs/domain/domain-model.md +235 -0
- k_extract-0.1.0/specs/index.md +53 -0
- k_extract-0.1.0/specs/lessons-learned/v1-to-v2-evolution.md +314 -0
- k_extract-0.1.0/specs/process/config-schema.md +151 -0
- k_extract-0.1.0/specs/process/extraction-pipeline.md +262 -0
- k_extract-0.1.0/specs/process/guided-session.md +102 -0
- k_extract-0.1.0/specs/process/job-lifecycle.md +121 -0
- k_extract-0.1.0/specs/process/output-format.md +95 -0
- k_extract-0.1.0/specs/prompts/implementation.md +197 -0
- k_extract-0.1.0/specs/prompts/process-revision.md +49 -0
- k_extract-0.1.0/specs/prompts/project-manager.md +104 -0
- k_extract-0.1.0/specs/prompts/verifier.md +89 -0
- k_extract-0.1.0/specs/reviews/.gitkeep +0 -0
- k_extract-0.1.0/specs/reviews/task-001.md +7 -0
- k_extract-0.1.0/specs/reviews/task-002.md +22 -0
- k_extract-0.1.0/specs/reviews/task-004.md +5 -0
- k_extract-0.1.0/specs/reviews/task-006.md +11 -0
- k_extract-0.1.0/specs/reviews/task-007.md +13 -0
- k_extract-0.1.0/specs/reviews/task-008.md +11 -0
- k_extract-0.1.0/specs/reviews/task-009.md +11 -0
- k_extract-0.1.0/specs/reviews/task-010.md +16 -0
- k_extract-0.1.0/specs/reviews/task-011.md +7 -0
- k_extract-0.1.0/specs/reviews/task-012.md +27 -0
- k_extract-0.1.0/specs/reviews/task-013.md +5 -0
- k_extract-0.1.0/specs/reviews/task-016.md +7 -0
- k_extract-0.1.0/specs/reviews/task-019.md +7 -0
- k_extract-0.1.0/specs/reviews/task-020.md +15 -0
- k_extract-0.1.0/specs/tasks/.gitkeep +0 -0
- k_extract-0.1.0/specs/tasks/task-001.md +75 -0
- k_extract-0.1.0/specs/tasks/task-002.md +71 -0
- k_extract-0.1.0/specs/tasks/task-003.md +61 -0
- k_extract-0.1.0/specs/tasks/task-004.md +59 -0
- k_extract-0.1.0/specs/tasks/task-005.md +70 -0
- k_extract-0.1.0/specs/tasks/task-006.md +75 -0
- k_extract-0.1.0/specs/tasks/task-007.md +79 -0
- k_extract-0.1.0/specs/tasks/task-008.md +63 -0
- k_extract-0.1.0/specs/tasks/task-009.md +65 -0
- k_extract-0.1.0/specs/tasks/task-010.md +82 -0
- k_extract-0.1.0/specs/tasks/task-011.md +72 -0
- k_extract-0.1.0/specs/tasks/task-012.md +84 -0
- k_extract-0.1.0/specs/tasks/task-013.md +58 -0
- k_extract-0.1.0/specs/tasks/task-014.md +52 -0
- k_extract-0.1.0/specs/tasks/task-015.md +64 -0
- k_extract-0.1.0/specs/tasks/task-016.md +66 -0
- k_extract-0.1.0/specs/tasks/task-017.md +63 -0
- k_extract-0.1.0/specs/tasks/task-018.md +74 -0
- k_extract-0.1.0/specs/tasks/task-019.md +75 -0
- k_extract-0.1.0/specs/tasks/task-020.md +91 -0
- k_extract-0.1.0/specs/tasks/task-021.md +39 -0
- k_extract-0.1.0/src/k_extract/__init__.py +0 -0
- k_extract-0.1.0/src/k_extract/cli/__init__.py +15 -0
- k_extract-0.1.0/src/k_extract/cli/display.py +109 -0
- k_extract-0.1.0/src/k_extract/cli/init.py +677 -0
- k_extract-0.1.0/src/k_extract/cli/jobs.py +191 -0
- k_extract-0.1.0/src/k_extract/cli/run.py +115 -0
- k_extract-0.1.0/src/k_extract/config/__init__.py +0 -0
- k_extract-0.1.0/src/k_extract/config/loader.py +67 -0
- k_extract-0.1.0/src/k_extract/config/schema.py +129 -0
- k_extract-0.1.0/src/k_extract/config/settings.py +49 -0
- k_extract-0.1.0/src/k_extract/domain/__init__.py +0 -0
- k_extract-0.1.0/src/k_extract/domain/entities.py +62 -0
- k_extract-0.1.0/src/k_extract/domain/mutations.py +155 -0
- k_extract-0.1.0/src/k_extract/domain/ontology.py +391 -0
- k_extract-0.1.0/src/k_extract/domain/relationships.py +70 -0
- k_extract-0.1.0/src/k_extract/extraction/__init__.py +0 -0
- k_extract-0.1.0/src/k_extract/extraction/agent.py +447 -0
- k_extract-0.1.0/src/k_extract/extraction/hooks.py +164 -0
- k_extract-0.1.0/src/k_extract/extraction/logging.py +93 -0
- k_extract-0.1.0/src/k_extract/extraction/models.py +76 -0
- k_extract-0.1.0/src/k_extract/extraction/prompts.py +181 -0
- k_extract-0.1.0/src/k_extract/extraction/store.py +845 -0
- k_extract-0.1.0/src/k_extract/extraction/templates/job_description.txt +7 -0
- k_extract-0.1.0/src/k_extract/extraction/templates/system_prompt.txt +58 -0
- k_extract-0.1.0/src/k_extract/extraction/tools.py +733 -0
- k_extract-0.1.0/src/k_extract/pipeline/__init__.py +0 -0
- k_extract-0.1.0/src/k_extract/pipeline/database.py +97 -0
- k_extract-0.1.0/src/k_extract/pipeline/defines.py +130 -0
- k_extract-0.1.0/src/k_extract/pipeline/fingerprint.py +212 -0
- k_extract-0.1.0/src/k_extract/pipeline/jobs.py +344 -0
- k_extract-0.1.0/src/k_extract/pipeline/orchestrator.py +585 -0
- k_extract-0.1.0/src/k_extract/pipeline/progress.py +208 -0
- k_extract-0.1.0/src/k_extract/pipeline/sources.py +318 -0
- k_extract-0.1.0/src/k_extract/pipeline/worker.py +210 -0
- k_extract-0.1.0/src/k_extract/pipeline/writer.py +56 -0
- k_extract-0.1.0/tests/__init__.py +0 -0
- k_extract-0.1.0/tests/cli/__init__.py +0 -0
- k_extract-0.1.0/tests/cli/test_cli.py +10 -0
- k_extract-0.1.0/tests/cli/test_display.py +176 -0
- k_extract-0.1.0/tests/cli/test_init.py +564 -0
- k_extract-0.1.0/tests/cli/test_jobs.py +649 -0
- k_extract-0.1.0/tests/config/__init__.py +0 -0
- k_extract-0.1.0/tests/config/test_loader.py +191 -0
- k_extract-0.1.0/tests/config/test_schema.py +404 -0
- k_extract-0.1.0/tests/config/test_settings.py +69 -0
- k_extract-0.1.0/tests/domain/__init__.py +0 -0
- k_extract-0.1.0/tests/domain/test_entities.py +111 -0
- k_extract-0.1.0/tests/domain/test_mutations.py +342 -0
- k_extract-0.1.0/tests/domain/test_ontology.py +924 -0
- k_extract-0.1.0/tests/domain/test_relationships.py +161 -0
- k_extract-0.1.0/tests/e2e/__init__.py +0 -0
- k_extract-0.1.0/tests/e2e/test_full_pipeline.py +259 -0
- k_extract-0.1.0/tests/extraction/__init__.py +0 -0
- k_extract-0.1.0/tests/extraction/templates/__init__.py +0 -0
- k_extract-0.1.0/tests/extraction/test_agent.py +1325 -0
- k_extract-0.1.0/tests/extraction/test_prompts.py +378 -0
- k_extract-0.1.0/tests/extraction/test_store.py +930 -0
- k_extract-0.1.0/tests/extraction/test_tools.py +1793 -0
- k_extract-0.1.0/tests/pipeline/__init__.py +0 -0
- k_extract-0.1.0/tests/pipeline/test_database.py +157 -0
- k_extract-0.1.0/tests/pipeline/test_defines.py +132 -0
- k_extract-0.1.0/tests/pipeline/test_fingerprint.py +272 -0
- k_extract-0.1.0/tests/pipeline/test_jobs.py +657 -0
- k_extract-0.1.0/tests/pipeline/test_orchestrator.py +776 -0
- k_extract-0.1.0/tests/pipeline/test_progress.py +316 -0
- k_extract-0.1.0/tests/pipeline/test_sources.py +395 -0
- k_extract-0.1.0/tests/pipeline/test_worker.py +734 -0
- k_extract-0.1.0/tests/pipeline/test_writer.py +179 -0
- k_extract-0.1.0/uv.lock +1043 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
branches: [main]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
lint-and-test:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v4
|
|
12
|
+
|
|
13
|
+
- name: Install uv
|
|
14
|
+
uses: astral-sh/setup-uv@v5
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
run: uv python install 3.12
|
|
18
|
+
|
|
19
|
+
- name: Install dependencies
|
|
20
|
+
run: uv sync --dev
|
|
21
|
+
|
|
22
|
+
- name: Lint
|
|
23
|
+
run: uv run ruff check src/ tests/
|
|
24
|
+
|
|
25
|
+
- name: Format check
|
|
26
|
+
run: uv run ruff format --check src/ tests/
|
|
27
|
+
|
|
28
|
+
- name: Type check
|
|
29
|
+
run: uv run pyright
|
|
30
|
+
|
|
31
|
+
- name: Test
|
|
32
|
+
run: uv run pytest
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
# -----------------------------------------------------------
|
|
12
|
+
# Job 1: Determine version, tag, and create GitHub Release
|
|
13
|
+
# -----------------------------------------------------------
|
|
14
|
+
release:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
if: github.repository == 'jsell-rh/k-extract'
|
|
17
|
+
|
|
18
|
+
permissions:
|
|
19
|
+
contents: write
|
|
20
|
+
id-token: write
|
|
21
|
+
|
|
22
|
+
outputs:
|
|
23
|
+
released: ${{ steps.semrel.outputs.released }}
|
|
24
|
+
version: ${{ steps.semrel.outputs.version }}
|
|
25
|
+
tag: ${{ steps.semrel.outputs.tag }}
|
|
26
|
+
|
|
27
|
+
steps:
|
|
28
|
+
- name: Checkout
|
|
29
|
+
uses: actions/checkout@v4
|
|
30
|
+
with:
|
|
31
|
+
fetch-depth: 0
|
|
32
|
+
persist-credentials: false
|
|
33
|
+
|
|
34
|
+
- name: Configure Git credentials
|
|
35
|
+
run: |
|
|
36
|
+
git config user.name "github-actions[bot]"
|
|
37
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
38
|
+
|
|
39
|
+
- name: Python Semantic Release
|
|
40
|
+
id: semrel
|
|
41
|
+
uses: python-semantic-release/python-semantic-release@v10
|
|
42
|
+
with:
|
|
43
|
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
44
|
+
root_options: "-vv"
|
|
45
|
+
|
|
46
|
+
# -----------------------------------------------------------
|
|
47
|
+
# Job 2: Build distribution artifacts
|
|
48
|
+
# -----------------------------------------------------------
|
|
49
|
+
build:
|
|
50
|
+
needs: release
|
|
51
|
+
if: needs.release.outputs.released == 'true'
|
|
52
|
+
runs-on: ubuntu-latest
|
|
53
|
+
|
|
54
|
+
steps:
|
|
55
|
+
- name: Checkout released tag
|
|
56
|
+
uses: actions/checkout@v4
|
|
57
|
+
with:
|
|
58
|
+
ref: ${{ needs.release.outputs.tag }}
|
|
59
|
+
|
|
60
|
+
- name: Install uv
|
|
61
|
+
uses: astral-sh/setup-uv@v5
|
|
62
|
+
|
|
63
|
+
- name: Set up Python
|
|
64
|
+
run: uv python install 3.12
|
|
65
|
+
|
|
66
|
+
- name: Build package
|
|
67
|
+
run: uv build
|
|
68
|
+
|
|
69
|
+
- name: Upload dist artifacts
|
|
70
|
+
uses: actions/upload-artifact@v4
|
|
71
|
+
with:
|
|
72
|
+
name: dist
|
|
73
|
+
path: dist/
|
|
74
|
+
if-no-files-found: error
|
|
75
|
+
|
|
76
|
+
# -----------------------------------------------------------
|
|
77
|
+
# Job 3: Publish to PyPI using OIDC Trusted Publishing
|
|
78
|
+
# -----------------------------------------------------------
|
|
79
|
+
publish-pypi:
|
|
80
|
+
needs: [release, build]
|
|
81
|
+
if: needs.release.outputs.released == 'true'
|
|
82
|
+
runs-on: ubuntu-latest
|
|
83
|
+
|
|
84
|
+
environment:
|
|
85
|
+
name: pypi
|
|
86
|
+
url: https://pypi.org/project/k-extract/${{ needs.release.outputs.version }}/
|
|
87
|
+
|
|
88
|
+
permissions:
|
|
89
|
+
id-token: write
|
|
90
|
+
|
|
91
|
+
steps:
|
|
92
|
+
- name: Download dist artifacts
|
|
93
|
+
uses: actions/download-artifact@v4
|
|
94
|
+
with:
|
|
95
|
+
name: dist
|
|
96
|
+
path: dist/
|
|
97
|
+
|
|
98
|
+
- name: Publish to PyPI
|
|
99
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
100
|
+
|
|
101
|
+
# -----------------------------------------------------------
|
|
102
|
+
# Job 4: Upload artifacts to the GitHub Release
|
|
103
|
+
# -----------------------------------------------------------
|
|
104
|
+
publish-github-release:
|
|
105
|
+
needs: [release, build]
|
|
106
|
+
if: needs.release.outputs.released == 'true'
|
|
107
|
+
runs-on: ubuntu-latest
|
|
108
|
+
|
|
109
|
+
permissions:
|
|
110
|
+
contents: write
|
|
111
|
+
id-token: write
|
|
112
|
+
|
|
113
|
+
steps:
|
|
114
|
+
- name: Download dist artifacts
|
|
115
|
+
uses: actions/download-artifact@v4
|
|
116
|
+
with:
|
|
117
|
+
name: dist
|
|
118
|
+
path: dist/
|
|
119
|
+
|
|
120
|
+
- name: Upload to GitHub Release
|
|
121
|
+
uses: python-semantic-release/publish-action@v10
|
|
122
|
+
with:
|
|
123
|
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
124
|
+
tag: ${{ needs.release.outputs.tag }}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.11.6
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
- repo: local
|
|
9
|
+
hooks:
|
|
10
|
+
- id: pyright
|
|
11
|
+
name: pyright
|
|
12
|
+
entry: uv run pyright
|
|
13
|
+
language: system
|
|
14
|
+
types: [python]
|
|
15
|
+
pass_filenames: false
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
k_extract-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: k-extract
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: General-purpose knowledge graph extraction framework
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Requires-Dist: claude-agent-sdk>=0.1.56
|
|
7
|
+
Requires-Dist: click>=8.1
|
|
8
|
+
Requires-Dist: pathspec>=0.12
|
|
9
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
10
|
+
Requires-Dist: pyyaml>=6.0
|
|
11
|
+
Requires-Dist: rich>=13.0
|
|
12
|
+
Requires-Dist: sqlalchemy>=2.0
|
|
13
|
+
Requires-Dist: structlog>=24.0
|
|
File without changes
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "k-extract"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "General-purpose knowledge graph extraction framework"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"claude-agent-sdk>=0.1.56",
|
|
9
|
+
"click>=8.1",
|
|
10
|
+
"rich>=13.0",
|
|
11
|
+
"sqlalchemy>=2.0",
|
|
12
|
+
"pydantic-settings>=2.0",
|
|
13
|
+
"structlog>=24.0",
|
|
14
|
+
"pyyaml>=6.0",
|
|
15
|
+
"pathspec>=0.12",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[dependency-groups]
|
|
19
|
+
dev = [
|
|
20
|
+
"pytest>=8.0",
|
|
21
|
+
"ruff>=0.4",
|
|
22
|
+
"pyright>=1.1",
|
|
23
|
+
"pre-commit>=3.0",
|
|
24
|
+
"pytest-asyncio>=1.3.0",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.scripts]
|
|
28
|
+
k-extract = "k_extract.cli:main"
|
|
29
|
+
|
|
30
|
+
[build-system]
|
|
31
|
+
requires = ["hatchling"]
|
|
32
|
+
build-backend = "hatchling.build"
|
|
33
|
+
|
|
34
|
+
[tool.hatch.build.targets.wheel]
|
|
35
|
+
packages = ["src/k_extract"]
|
|
36
|
+
|
|
37
|
+
[tool.pytest.ini_options]
|
|
38
|
+
markers = [
|
|
39
|
+
"e2e: end-to-end integration tests requiring a real Claude API key (deselected by default)",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[tool.ruff]
|
|
43
|
+
src = ["src"]
|
|
44
|
+
target-version = "py312"
|
|
45
|
+
|
|
46
|
+
[tool.ruff.lint]
|
|
47
|
+
select = ["E", "F", "I", "UP", "B", "SIM"]
|
|
48
|
+
|
|
49
|
+
[tool.pyright]
|
|
50
|
+
pythonVersion = "3.12"
|
|
51
|
+
venvPath = "."
|
|
52
|
+
venv = ".venv"
|
|
53
|
+
include = ["src"]
|
|
54
|
+
typeCheckingMode = "standard"
|
|
55
|
+
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
# Semantic Release
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
[tool.semantic_release]
|
|
60
|
+
commit_parser = "conventional"
|
|
61
|
+
version_toml = ["pyproject.toml:project.version"]
|
|
62
|
+
allow_zero_version = true
|
|
63
|
+
major_on_zero = false
|
|
64
|
+
|
|
65
|
+
[tool.semantic_release.branches.main]
|
|
66
|
+
match = "main"
|
|
67
|
+
|
|
68
|
+
[tool.semantic_release.publish]
|
|
69
|
+
upload_to_vcs_release = false
|
|
70
|
+
|
|
71
|
+
[tool.semantic_release.commit_author]
|
|
72
|
+
env = "GIT_COMMIT_AUTHOR"
|
|
73
|
+
default = "github-actions[bot] <github-actions[bot]@users.noreply.github.com>"
|
|
74
|
+
|
|
75
|
+
[tool.semantic_release.commit_parser_options]
|
|
76
|
+
minor_tags = ["feat"]
|
|
77
|
+
patch_tags = ["fix", "perf"]
|
|
78
|
+
parse_squash_commits = true
|
|
79
|
+
ignore_merge_commits = true
|
|
80
|
+
|
|
81
|
+
[tool.semantic_release.changelog]
|
|
82
|
+
exclude_commit_patterns = [
|
|
83
|
+
'''chore(?:\([^)]*?\))?: .+''',
|
|
84
|
+
'''ci(?:\([^)]*?\))?: .+''',
|
|
85
|
+
'''refactor(?:\([^)]*?\))?: .+''',
|
|
86
|
+
'''style(?:\([^)]*?\))?: .+''',
|
|
87
|
+
'''test(?:\([^)]*?\))?: .+''',
|
|
88
|
+
'''docs(?:\([^)]*?\))?: .+''',
|
|
89
|
+
'''build(?:\([^)]*?\))?: .+''',
|
|
90
|
+
'''review(?:\([^)]*?\))?: .+''',
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
[tool.semantic_release.changelog.default_templates]
|
|
94
|
+
changelog_file = "CHANGELOG.md"
|
|
95
|
+
output_format = "md"
|
|
96
|
+
|
|
97
|
+
[tool.semantic_release.remote]
|
|
98
|
+
type = "github"
|
|
99
|
+
token = { env = "GH_TOKEN" }
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
while true; do
|
|
2
|
+
claude --model opus[1m] --dangerously-skip-permissions < specs/prompts/project-manager.md
|
|
3
|
+
claude --model opus[1m] --dangerously-skip-permissions < specs/prompts/implementation.md
|
|
4
|
+
claude --model opus[1m] --dangerously-skip-permissions < specs/prompts/verifier.md
|
|
5
|
+
claude --model opus[1m] --dangerously-skip-permissions < specs/prompts/process-revision.md
|
|
6
|
+
done
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# k-extract Build Stats — tracks velocity, quality, and cost metrics across tasks.
|
|
3
|
+
# Usage: ./scripts/stats.sh [--json]
|
|
4
|
+
set -uo pipefail
|
|
5
|
+
|
|
6
|
+
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|
7
|
+
TASKS_DIR="$REPO_ROOT/specs/tasks"
|
|
8
|
+
REVIEWS_DIR="$REPO_ROOT/specs/reviews"
|
|
9
|
+
JSON_MODE=false
|
|
10
|
+
[[ "${1:-}" == "--json" ]] && JSON_MODE=true
|
|
11
|
+
|
|
12
|
+
# --- Colors (disabled for JSON or non-tty) ---
|
|
13
|
+
if $JSON_MODE; then
|
|
14
|
+
BOLD="" DIM="" RESET="" GREEN="" YELLOW="" RED="" CYAN="" BLUE="" MAGENTA=""
|
|
15
|
+
else
|
|
16
|
+
BOLD=$'\033[1m' DIM=$'\033[2m' RESET=$'\033[0m'
|
|
17
|
+
GREEN=$'\033[32m' YELLOW=$'\033[33m' RED=$'\033[31m'
|
|
18
|
+
CYAN=$'\033[36m' BLUE=$'\033[34m' MAGENTA=$'\033[35m'
|
|
19
|
+
fi
|
|
20
|
+
|
|
21
|
+
# --- Task status counts ---
|
|
22
|
+
total_tasks=0; complete=0; in_review=0; in_progress=0; not_started=0
|
|
23
|
+
declare -a task_names=() task_statuses=() task_titles=()
|
|
24
|
+
|
|
25
|
+
for f in "$TASKS_DIR"/task-*.md; do
|
|
26
|
+
[[ -f "$f" ]] || continue
|
|
27
|
+
total_tasks=$((total_tasks + 1))
|
|
28
|
+
name=$(basename "$f" .md)
|
|
29
|
+
status=$(grep -oP '(?<=\*\*(Status|Progress):\*\* `)[^`]+' "$f" 2>/dev/null | head -1 || echo "unknown")
|
|
30
|
+
title=$(head -1 "$f" | sed -E 's/^# (TASK-[0-9]+|Task [0-9]+): //')
|
|
31
|
+
# Replace em-dashes with plain dashes for consistent column width
|
|
32
|
+
title="${title//—/-}"
|
|
33
|
+
# Truncate to 38 chars with ellipsis in the middle
|
|
34
|
+
if [[ ${#title} -gt 38 ]]; then
|
|
35
|
+
title="${title:0:18}..${title: -18}"
|
|
36
|
+
fi
|
|
37
|
+
task_names+=("$name")
|
|
38
|
+
task_statuses+=("$status")
|
|
39
|
+
task_titles+=("$title")
|
|
40
|
+
case "$status" in
|
|
41
|
+
complete) complete=$((complete + 1)) ;;
|
|
42
|
+
ready-for-review|in-review) in_review=$((in_review + 1)) ;;
|
|
43
|
+
in-progress) in_progress=$((in_progress + 1)) ;;
|
|
44
|
+
not-started) not_started=$((not_started + 1)) ;;
|
|
45
|
+
esac
|
|
46
|
+
done
|
|
47
|
+
|
|
48
|
+
# --- Review metrics per task ---
|
|
49
|
+
declare -A review_rounds=() review_findings=()
|
|
50
|
+
|
|
51
|
+
for f in "$REVIEWS_DIR"/task-*.md; do
|
|
52
|
+
[[ -f "$f" ]] || continue
|
|
53
|
+
name=$(basename "$f" .md)
|
|
54
|
+
round_headers=$(grep -c '^## Round [0-9]' "$f" 2>/dev/null || true)
|
|
55
|
+
rn_headers=$(grep -c '^## R[0-9]' "$f" 2>/dev/null || true)
|
|
56
|
+
findings_headers=$(grep -c '^## Findings' "$f" 2>/dev/null || true)
|
|
57
|
+
rounds=$((${round_headers:-0} + ${rn_headers:-0} + ${findings_headers:-0}))
|
|
58
|
+
findings=$(grep -c 'process-revision-complete' "$f" 2>/dev/null || true)
|
|
59
|
+
findings=${findings:-0}
|
|
60
|
+
review_rounds[$name]=$((rounds))
|
|
61
|
+
review_findings[$name]=$((findings))
|
|
62
|
+
done
|
|
63
|
+
|
|
64
|
+
# --- Wall clock time per task (from git history) ---
|
|
65
|
+
OUTLIER_GAP=3600 # 1 hour in seconds
|
|
66
|
+
|
|
67
|
+
declare -A task_wall_clock=() task_first_commit=() task_last_commit=()
|
|
68
|
+
declare -A task_active_seconds=()
|
|
69
|
+
declare -A task_commits=()
|
|
70
|
+
|
|
71
|
+
compute_task_time() {
|
|
72
|
+
local task_id="$1"
|
|
73
|
+
local commits
|
|
74
|
+
commits=$(cd "$REPO_ROOT" && git log --all --format="%at" --grep="$task_id" --reverse 2>/dev/null)
|
|
75
|
+
[[ -z "$commits" ]] && return
|
|
76
|
+
|
|
77
|
+
local first last prev elapsed active_time=0 count=0
|
|
78
|
+
while IFS= read -r ts; do
|
|
79
|
+
[[ -z "$ts" ]] && continue
|
|
80
|
+
if [[ $count -eq 0 ]]; then
|
|
81
|
+
first=$ts
|
|
82
|
+
else
|
|
83
|
+
local gap=$((ts - prev))
|
|
84
|
+
if [[ $gap -gt 0 && $gap -lt $OUTLIER_GAP ]]; then
|
|
85
|
+
active_time=$((active_time + gap))
|
|
86
|
+
fi
|
|
87
|
+
fi
|
|
88
|
+
last=$ts
|
|
89
|
+
prev=$ts
|
|
90
|
+
count=$((count + 1))
|
|
91
|
+
done <<< "$commits"
|
|
92
|
+
|
|
93
|
+
[[ $count -lt 2 ]] && return
|
|
94
|
+
|
|
95
|
+
task_first_commit[$task_id]=$first
|
|
96
|
+
task_last_commit[$task_id]=$last
|
|
97
|
+
task_wall_clock[$task_id]=$((last - first))
|
|
98
|
+
task_active_seconds[$task_id]=$active_time
|
|
99
|
+
task_commits[$task_id]=$count
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
for name in "${task_names[@]}"; do
|
|
103
|
+
compute_task_time "$name"
|
|
104
|
+
done
|
|
105
|
+
|
|
106
|
+
# --- Code metrics ---
|
|
107
|
+
total_py_lines=$(find "$REPO_ROOT/src" -name "*.py" -not -path "*/.git/*" 2>/dev/null | xargs wc -l 2>/dev/null | tail -1 | awk '{print $1}')
|
|
108
|
+
total_py_lines=${total_py_lines:-0}
|
|
109
|
+
test_py_lines=$(find "$REPO_ROOT" -name "test_*.py" -o -name "*_test.py" -not -path "*/.git/*" 2>/dev/null | xargs wc -l 2>/dev/null | tail -1 | awk '{print $1}')
|
|
110
|
+
test_py_lines=${test_py_lines:-0}
|
|
111
|
+
prod_py_lines=$((total_py_lines - test_py_lines))
|
|
112
|
+
|
|
113
|
+
total_commits=$(cd "$REPO_ROOT" && git log --all --oneline | wc -l)
|
|
114
|
+
|
|
115
|
+
# Overall timeline
|
|
116
|
+
first_commit_ts=$(cd "$REPO_ROOT" && git log --all --format="%at" --reverse | head -1)
|
|
117
|
+
last_commit_ts=$(cd "$REPO_ROOT" && git log --all --format="%at" | head -1)
|
|
118
|
+
total_wall_seconds=$((last_commit_ts - first_commit_ts))
|
|
119
|
+
|
|
120
|
+
# Checklist items (process learning)
|
|
121
|
+
checklist_items=$(cd "$REPO_ROOT" && git log --all --oneline --grep="fix(process)" | wc -l)
|
|
122
|
+
|
|
123
|
+
# --- Format helpers ---
|
|
124
|
+
fmt_duration() {
|
|
125
|
+
local secs=$1
|
|
126
|
+
local hrs=$((secs / 3600))
|
|
127
|
+
local mins=$(( (secs % 3600) / 60 ))
|
|
128
|
+
local s=$((secs % 60))
|
|
129
|
+
if [[ $hrs -gt 0 ]]; then
|
|
130
|
+
printf "%dh %dm %ds" "$hrs" "$mins" "$s"
|
|
131
|
+
else
|
|
132
|
+
printf "%dm %ds" "$mins" "$s"
|
|
133
|
+
fi
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
progress_bar() {
|
|
137
|
+
local total=$1 n_complete=$2 n_review=$3 n_progress=$4 n_not_started=$5
|
|
138
|
+
local width=30
|
|
139
|
+
local w_complete=$((n_complete * width / total))
|
|
140
|
+
local w_review=$((n_review * width / total))
|
|
141
|
+
local w_progress=$((n_progress * width / total))
|
|
142
|
+
local w_not_started=$((width - w_complete - w_review - w_progress))
|
|
143
|
+
|
|
144
|
+
local seg=""
|
|
145
|
+
for ((i=0; i<w_complete; i++)); do seg+="#"; done
|
|
146
|
+
printf "%s%s" "${GREEN}" "$seg"
|
|
147
|
+
seg=""
|
|
148
|
+
for ((i=0; i<w_review; i++)); do seg+="#"; done
|
|
149
|
+
printf "%s%s" "${MAGENTA}" "$seg"
|
|
150
|
+
seg=""
|
|
151
|
+
for ((i=0; i<w_progress; i++)); do seg+="#"; done
|
|
152
|
+
printf "%s%s" "${BLUE}" "$seg"
|
|
153
|
+
seg=""
|
|
154
|
+
for ((i=0; i<w_not_started; i++)); do seg+="-"; done
|
|
155
|
+
printf "%s%s%s" "${DIM}" "$seg" "${RESET}"
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
# --- JSON output ---
|
|
159
|
+
if $JSON_MODE; then
|
|
160
|
+
echo "{"
|
|
161
|
+
echo " \"summary\": {"
|
|
162
|
+
echo " \"total_tasks\": $total_tasks,"
|
|
163
|
+
echo " \"complete\": $complete,"
|
|
164
|
+
echo " \"in_review\": $in_review,"
|
|
165
|
+
echo " \"in_progress\": $in_progress,"
|
|
166
|
+
echo " \"not_started\": $not_started,"
|
|
167
|
+
echo " \"progress_pct\": $((complete * 100 / total_tasks)),"
|
|
168
|
+
echo " \"total_commits\": $total_commits,"
|
|
169
|
+
echo " \"prod_lines\": $prod_py_lines,"
|
|
170
|
+
echo " \"test_lines\": $test_py_lines,"
|
|
171
|
+
if [[ $total_py_lines -gt 0 ]]; then
|
|
172
|
+
echo " \"test_ratio\": \"$(printf '%.1f' "$(echo "scale=1; $test_py_lines * 100 / $total_py_lines" | bc)")%\","
|
|
173
|
+
else
|
|
174
|
+
echo " \"test_ratio\": \"0.0%\","
|
|
175
|
+
fi
|
|
176
|
+
echo " \"total_wall_clock_seconds\": $total_wall_seconds,"
|
|
177
|
+
echo " \"checklist_items\": $checklist_items"
|
|
178
|
+
echo " },"
|
|
179
|
+
echo " \"tasks\": ["
|
|
180
|
+
first=true
|
|
181
|
+
for i in "${!task_names[@]}"; do
|
|
182
|
+
name="${task_names[$i]}"
|
|
183
|
+
status="${task_statuses[$i]}"
|
|
184
|
+
rounds=${review_rounds[$name]:-0}
|
|
185
|
+
findings=${review_findings[$name]:-0}
|
|
186
|
+
active=${task_active_seconds[$name]:-0}
|
|
187
|
+
wall=${task_wall_clock[$name]:-0}
|
|
188
|
+
$first || echo ","
|
|
189
|
+
first=false
|
|
190
|
+
commits=${task_commits[$name]:-0}
|
|
191
|
+
printf ' {"task": "%s", "status": "%s", "commits": %d, "review_rounds": %d, "findings": %d, "active_seconds": %d, "wall_seconds": %d}' \
|
|
192
|
+
"$name" "$status" "$commits" "$rounds" "$findings" "$active" "$wall"
|
|
193
|
+
done
|
|
194
|
+
echo ""
|
|
195
|
+
echo " ]"
|
|
196
|
+
echo "}"
|
|
197
|
+
exit 0
|
|
198
|
+
fi
|
|
199
|
+
|
|
200
|
+
# --- Human output ---
|
|
201
|
+
echo ""
|
|
202
|
+
echo "${BOLD}k-extract Build Stats${RESET}"
|
|
203
|
+
echo "${DIM}$(date '+%Y-%m-%d %H:%M')${RESET}"
|
|
204
|
+
echo ""
|
|
205
|
+
|
|
206
|
+
# Progress
|
|
207
|
+
if [[ $total_tasks -gt 0 ]]; then
|
|
208
|
+
pct=$((complete * 100 / total_tasks))
|
|
209
|
+
echo "${BOLD}Progress${RESET}"
|
|
210
|
+
printf " [$(progress_bar $total_tasks $complete $in_review $in_progress $not_started)] %d%% (%d/%d tasks)\n" "$pct" "$complete" "$total_tasks"
|
|
211
|
+
echo " ${GREEN}$complete complete${RESET} ${MAGENTA}$in_review in review${RESET} ${BLUE}$in_progress in progress${RESET} ${DIM}$not_started not started${RESET}"
|
|
212
|
+
else
|
|
213
|
+
echo "${BOLD}Progress${RESET}"
|
|
214
|
+
echo " ${DIM}No tasks found in $TASKS_DIR${RESET}"
|
|
215
|
+
fi
|
|
216
|
+
echo ""
|
|
217
|
+
|
|
218
|
+
# Timeline
|
|
219
|
+
echo "${BOLD}Timeline${RESET}"
|
|
220
|
+
echo " Total wall clock: $(fmt_duration $total_wall_seconds)"
|
|
221
|
+
echo " Total commits: $total_commits"
|
|
222
|
+
echo " Checklist items: $checklist_items (accumulated process learnings)"
|
|
223
|
+
echo ""
|
|
224
|
+
|
|
225
|
+
# Code
|
|
226
|
+
echo "${BOLD}Code${RESET}"
|
|
227
|
+
echo " Production: $prod_py_lines lines"
|
|
228
|
+
echo " Test: $test_py_lines lines"
|
|
229
|
+
if [[ $total_py_lines -gt 0 ]]; then
|
|
230
|
+
echo " Test ratio: $(printf '%.0f' "$(echo "scale=1; $test_py_lines * 100 / $total_py_lines" | bc)")% of total"
|
|
231
|
+
fi
|
|
232
|
+
if [[ $total_wall_seconds -gt 0 && $prod_py_lines -gt 0 ]]; then
|
|
233
|
+
echo " Throughput: ~$(( prod_py_lines * 3600 / total_wall_seconds )) prod lines/hr"
|
|
234
|
+
fi
|
|
235
|
+
echo ""
|
|
236
|
+
|
|
237
|
+
# Per-task breakdown
|
|
238
|
+
if [[ $total_tasks -gt 0 ]]; then
|
|
239
|
+
echo "${BOLD}Task Breakdown${RESET}"
|
|
240
|
+
printf " ${DIM}%-8s %-38s %-16s %7s %6s %8s %14s${RESET}\n" "TASK" "TITLE" "STATUS" "COMMITS" "ROUNDS" "FINDINGS" "ACTIVE TIME"
|
|
241
|
+
printf " %s%s%s\n" "${DIM}" "------------------------------------------------------------------------------------------------------" "${RESET}"
|
|
242
|
+
|
|
243
|
+
total_rounds=0; total_findings=0; total_active=0; total_task_commits=0
|
|
244
|
+
for i in "${!task_names[@]}"; do
|
|
245
|
+
name="${task_names[$i]}"
|
|
246
|
+
status="${task_statuses[$i]}"
|
|
247
|
+
rounds=${review_rounds[$name]:-0}
|
|
248
|
+
findings=${review_findings[$name]:-0}
|
|
249
|
+
active=${task_active_seconds[$name]:-0}
|
|
250
|
+
commits=${task_commits[$name]:-0}
|
|
251
|
+
|
|
252
|
+
total_rounds=$((total_rounds + rounds))
|
|
253
|
+
total_findings=$((total_findings + findings))
|
|
254
|
+
total_active=$((total_active + active))
|
|
255
|
+
total_task_commits=$((total_task_commits + commits))
|
|
256
|
+
|
|
257
|
+
case "$status" in
|
|
258
|
+
complete) status_label="complete"; status_color="$GREEN" ;;
|
|
259
|
+
ready-for-review) status_label="ready-for-review"; status_color="$MAGENTA" ;;
|
|
260
|
+
needs-revision) status_label="needs-revision"; status_color="$YELLOW" ;;
|
|
261
|
+
in-review) status_label="in-review"; status_color="$YELLOW" ;;
|
|
262
|
+
in-progress) status_label="in-progress"; status_color="$BLUE" ;;
|
|
263
|
+
not-started) status_label="not-started"; status_color="$DIM" ;;
|
|
264
|
+
*) status_label="$status"; status_color="" ;;
|
|
265
|
+
esac
|
|
266
|
+
sc="${status_color}$(printf '%-16s' "$status_label")${RESET}"
|
|
267
|
+
|
|
268
|
+
rpad=$(printf "%6d" "$rounds")
|
|
269
|
+
if [[ $rounds -eq 0 ]]; then
|
|
270
|
+
rc="${DIM}${rpad}${RESET}"
|
|
271
|
+
elif [[ $rounds -le 3 ]]; then
|
|
272
|
+
rc="${GREEN}${rpad}${RESET}"
|
|
273
|
+
elif [[ $rounds -le 7 ]]; then
|
|
274
|
+
rc="${YELLOW}${rpad}${RESET}"
|
|
275
|
+
else
|
|
276
|
+
rc="${RED}${rpad}${RESET}"
|
|
277
|
+
fi
|
|
278
|
+
|
|
279
|
+
fpad=$(printf "%8d" "$findings")
|
|
280
|
+
|
|
281
|
+
if [[ $active -gt 0 ]]; then
|
|
282
|
+
at=$(printf "%14s" "$(fmt_duration $active)")
|
|
283
|
+
else
|
|
284
|
+
at="$(printf '%12s' '')${DIM}--${RESET}"
|
|
285
|
+
fi
|
|
286
|
+
|
|
287
|
+
cpad=$(printf "%7d" "$commits")
|
|
288
|
+
title="${task_titles[$i]}"
|
|
289
|
+
printf " %-8s %-38s %s %s %s %s %s\n" "$name" "$title" "$sc" "$cpad" "$rc" "$fpad" "$at"
|
|
290
|
+
done
|
|
291
|
+
|
|
292
|
+
printf " %s%s%s\n" "${DIM}" "------------------------------------------------------------------------------------------------------" "${RESET}"
|
|
293
|
+
printf " ${BOLD}%-8s %-38s %-16s %7d %6d %8d %14s${RESET}\n" "TOTAL" "" "" "$total_task_commits" "$total_rounds" "$total_findings" "$(fmt_duration $total_active)"
|
|
294
|
+
echo ""
|
|
295
|
+
|
|
296
|
+
# Review efficiency
|
|
297
|
+
if [[ $total_findings -gt 0 ]]; then
|
|
298
|
+
echo "${BOLD}Review Efficiency${RESET}"
|
|
299
|
+
echo " Total findings: $total_findings"
|
|
300
|
+
echo " Total review rounds: $total_rounds"
|
|
301
|
+
echo " Findings per round: $(printf '%.1f' "$(echo "scale=1; $total_findings / $total_rounds" | bc)")"
|
|
302
|
+
if [[ $complete -gt 0 ]]; then
|
|
303
|
+
echo " Avg rounds per task: $(printf '%.1f' "$(echo "scale=1; $total_rounds / $complete" | bc)") (completed tasks only)"
|
|
304
|
+
fi
|
|
305
|
+
echo ""
|
|
306
|
+
|
|
307
|
+
echo " ${DIM}Highest review effort:${RESET}"
|
|
308
|
+
for name in "${task_names[@]}"; do
|
|
309
|
+
r=${review_rounds[$name]:-0}
|
|
310
|
+
f=${review_findings[$name]:-0}
|
|
311
|
+
[[ $r -gt 0 ]] && echo " $r rounds, $f findings $name"
|
|
312
|
+
done | sort -rn | head -3
|
|
313
|
+
echo ""
|
|
314
|
+
fi
|
|
315
|
+
fi
|