mnemebrain-benchmark 0.1.0a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mnemebrain_benchmark-0.1.0a1/.github/ISSUE_TEMPLATE/bug_report.yml +66 -0
- mnemebrain_benchmark-0.1.0a1/.github/ISSUE_TEMPLATE/config.yml +2 -0
- mnemebrain_benchmark-0.1.0a1/.github/ISSUE_TEMPLATE/feature_request.yml +44 -0
- mnemebrain_benchmark-0.1.0a1/.github/ISSUE_TEMPLATE/new_adapter.yml +61 -0
- mnemebrain_benchmark-0.1.0a1/.github/workflows/ci.yml +84 -0
- mnemebrain_benchmark-0.1.0a1/.github/workflows/codeql.yml +54 -0
- mnemebrain_benchmark-0.1.0a1/.github/workflows/dependency-review.yml +19 -0
- mnemebrain_benchmark-0.1.0a1/.github/workflows/pylint.yml +31 -0
- mnemebrain_benchmark-0.1.0a1/.github/workflows/release.yml +96 -0
- mnemebrain_benchmark-0.1.0a1/.gitignore +19 -0
- mnemebrain_benchmark-0.1.0a1/BMB_REPORT.md +417 -0
- mnemebrain_benchmark-0.1.0a1/CONTRIBUTING.md +353 -0
- mnemebrain_benchmark-0.1.0a1/LICENSE +21 -0
- mnemebrain_benchmark-0.1.0a1/PKG-INFO +26 -0
- mnemebrain_benchmark-0.1.0a1/README.md +128 -0
- mnemebrain_benchmark-0.1.0a1/docs/adding-adapters.md +211 -0
- mnemebrain_benchmark-0.1.0a1/docs/architecture.md +118 -0
- mnemebrain_benchmark-0.1.0a1/pyproject.toml +46 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/__init__.py +1 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/__main__.py +4 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/adapters/__init__.py +1 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/adapters/langchain_buffer.py +55 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/adapters/mem0_adapter.py +181 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/adapters/mnemebrain_adapter.py +216 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/adapters/naive_baseline.py +68 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/adapters/openai_rag_adapter.py +110 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/adapters/rag_baseline.py +77 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/adapters/structured_memory.py +154 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/bmb_cli.py +220 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/data/claim_pairs.json +602 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/dataset.py +95 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/interface.py +146 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/metrics.py +129 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/protocols.py +14 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/runner.py +207 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/scenarios/__init__.py +0 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/scenarios/data/bmb_scenarios.json +2640 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/scenarios/data/scenarios.json +1069 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/scenarios/loader.py +93 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/scenarios/schema.py +73 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/scoring.py +307 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/system_cli.py +94 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/system_report.py +95 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/system_runner.py +167 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/task_evals/__init__.py +1 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/task_evals/__main__.py +118 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/task_evals/base.py +61 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/task_evals/data/preference_scenarios.json +538 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/task_evals/data/qa_scenarios.json +449 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/task_evals/long_horizon_qa.py +49 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/task_evals/preference_tracking.py +49 -0
- mnemebrain_benchmark-0.1.0a1/src/mnemebrain_benchmark/task_evals/runner.py +129 -0
- mnemebrain_benchmark-0.1.0a1/tests/__init__.py +0 -0
- mnemebrain_benchmark-0.1.0a1/tests/conftest.py +5 -0
- mnemebrain_benchmark-0.1.0a1/tests/helpers.py +46 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_cli.py +110 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_dataset.py +146 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_interface.py +152 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_langchain_buffer.py +50 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_loader.py +121 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_mem0_adapter.py +164 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_metrics.py +106 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_mnemebrain_adapter.py +250 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_naive_baseline.py +60 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_openai_rag_adapter.py +111 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_protocols.py +26 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_rag_baseline.py +63 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_runner.py +54 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_schema.py +82 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_scoring.py +348 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_structured_memory.py +133 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_system_report.py +77 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_system_runner.py +284 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_task_evals.py +118 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_task_evals_runner.py +108 -0
- mnemebrain_benchmark-0.1.0a1/tests/test_task_evals_scenarios.py +134 -0
- mnemebrain_benchmark-0.1.0a1/uv.lock +1934 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
name: Bug Report
|
|
2
|
+
description: Report a bug in mnemebrain-benchmark
|
|
3
|
+
labels: ["bug"]
|
|
4
|
+
body:
|
|
5
|
+
- type: markdown
|
|
6
|
+
attributes:
|
|
7
|
+
value: |
|
|
8
|
+
Thanks for reporting a bug. Please fill in the details below.
|
|
9
|
+
|
|
10
|
+
- type: input
|
|
11
|
+
id: version
|
|
12
|
+
attributes:
|
|
13
|
+
label: Package version
|
|
14
|
+
description: Output of `pip show mnemebrain-benchmark | grep Version`
|
|
15
|
+
placeholder: "0.1.0"
|
|
16
|
+
validations:
|
|
17
|
+
required: true
|
|
18
|
+
|
|
19
|
+
- type: input
|
|
20
|
+
id: python
|
|
21
|
+
attributes:
|
|
22
|
+
label: Python version
|
|
23
|
+
description: Output of `python --version`
|
|
24
|
+
placeholder: "3.12.0"
|
|
25
|
+
validations:
|
|
26
|
+
required: true
|
|
27
|
+
|
|
28
|
+
- type: dropdown
|
|
29
|
+
id: component
|
|
30
|
+
attributes:
|
|
31
|
+
label: Component
|
|
32
|
+
options:
|
|
33
|
+
- System Benchmark
|
|
34
|
+
- BMB (Belief Maintenance Benchmark)
|
|
35
|
+
- Embedding Benchmark
|
|
36
|
+
- Task Evaluations
|
|
37
|
+
- Adapter (specify in description)
|
|
38
|
+
- Scoring / Metrics
|
|
39
|
+
- CLI
|
|
40
|
+
- Other
|
|
41
|
+
validations:
|
|
42
|
+
required: true
|
|
43
|
+
|
|
44
|
+
- type: textarea
|
|
45
|
+
id: description
|
|
46
|
+
attributes:
|
|
47
|
+
label: Description
|
|
48
|
+
description: What happened and what did you expect?
|
|
49
|
+
validations:
|
|
50
|
+
required: true
|
|
51
|
+
|
|
52
|
+
- type: textarea
|
|
53
|
+
id: reproduce
|
|
54
|
+
attributes:
|
|
55
|
+
label: Steps to reproduce
|
|
56
|
+
description: Minimal code or commands to reproduce the issue.
|
|
57
|
+
render: python
|
|
58
|
+
validations:
|
|
59
|
+
required: true
|
|
60
|
+
|
|
61
|
+
- type: textarea
|
|
62
|
+
id: logs
|
|
63
|
+
attributes:
|
|
64
|
+
label: Error output
|
|
65
|
+
description: Paste any relevant error messages or tracebacks.
|
|
66
|
+
render: text
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
name: Feature Request
|
|
2
|
+
description: Suggest a new feature or improvement
|
|
3
|
+
labels: ["enhancement"]
|
|
4
|
+
body:
|
|
5
|
+
- type: markdown
|
|
6
|
+
attributes:
|
|
7
|
+
value: |
|
|
8
|
+
Thanks for suggesting an improvement to mnemebrain-benchmark.
|
|
9
|
+
|
|
10
|
+
- type: dropdown
|
|
11
|
+
id: category
|
|
12
|
+
attributes:
|
|
13
|
+
label: Category
|
|
14
|
+
options:
|
|
15
|
+
- New adapter
|
|
16
|
+
- New benchmark scenario
|
|
17
|
+
- New metric or scoring method
|
|
18
|
+
- CLI improvement
|
|
19
|
+
- Documentation
|
|
20
|
+
- Other
|
|
21
|
+
validations:
|
|
22
|
+
required: true
|
|
23
|
+
|
|
24
|
+
- type: textarea
|
|
25
|
+
id: description
|
|
26
|
+
attributes:
|
|
27
|
+
label: Description
|
|
28
|
+
description: What would you like to see added or changed?
|
|
29
|
+
validations:
|
|
30
|
+
required: true
|
|
31
|
+
|
|
32
|
+
- type: textarea
|
|
33
|
+
id: motivation
|
|
34
|
+
attributes:
|
|
35
|
+
label: Motivation
|
|
36
|
+
description: Why is this useful? What problem does it solve?
|
|
37
|
+
validations:
|
|
38
|
+
required: true
|
|
39
|
+
|
|
40
|
+
- type: textarea
|
|
41
|
+
id: alternatives
|
|
42
|
+
attributes:
|
|
43
|
+
label: Alternatives considered
|
|
44
|
+
description: Have you considered any alternative solutions?
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
name: New Adapter Proposal
|
|
2
|
+
description: Propose a new memory system adapter for the benchmark
|
|
3
|
+
labels: ["adapter", "enhancement"]
|
|
4
|
+
body:
|
|
5
|
+
- type: markdown
|
|
6
|
+
attributes:
|
|
7
|
+
value: |
|
|
8
|
+
Propose adding a new memory system adapter to the benchmark suite.
|
|
9
|
+
See [docs/adding-adapters.md](../../docs/adding-adapters.md) for the implementation guide.
|
|
10
|
+
|
|
11
|
+
- type: input
|
|
12
|
+
id: system_name
|
|
13
|
+
attributes:
|
|
14
|
+
label: Memory system name
|
|
15
|
+
placeholder: "e.g. chromadb, weaviate, pinecone"
|
|
16
|
+
validations:
|
|
17
|
+
required: true
|
|
18
|
+
|
|
19
|
+
- type: checkboxes
|
|
20
|
+
id: capabilities
|
|
21
|
+
attributes:
|
|
22
|
+
label: Capabilities
|
|
23
|
+
description: Which capabilities does this system support?
|
|
24
|
+
options:
|
|
25
|
+
- label: "store"
|
|
26
|
+
- label: "query"
|
|
27
|
+
- label: "retract"
|
|
28
|
+
- label: "explain"
|
|
29
|
+
- label: "contradiction"
|
|
30
|
+
- label: "decay"
|
|
31
|
+
- label: "revise"
|
|
32
|
+
- label: "sandbox"
|
|
33
|
+
- label: "attack"
|
|
34
|
+
- label: "consolidation"
|
|
35
|
+
- label: "hipporag"
|
|
36
|
+
- label: "pattern_separation"
|
|
37
|
+
|
|
38
|
+
- type: dropdown
|
|
39
|
+
id: dependency_type
|
|
40
|
+
attributes:
|
|
41
|
+
label: Dependency type
|
|
42
|
+
options:
|
|
43
|
+
- Local only (no API key needed)
|
|
44
|
+
- Cloud API (requires API key)
|
|
45
|
+
- Hybrid
|
|
46
|
+
validations:
|
|
47
|
+
required: true
|
|
48
|
+
|
|
49
|
+
- type: textarea
|
|
50
|
+
id: description
|
|
51
|
+
attributes:
|
|
52
|
+
label: Description
|
|
53
|
+
description: Brief description of the system and why it's a useful benchmark target.
|
|
54
|
+
validations:
|
|
55
|
+
required: true
|
|
56
|
+
|
|
57
|
+
- type: textarea
|
|
58
|
+
id: implementation_notes
|
|
59
|
+
attributes:
|
|
60
|
+
label: Implementation notes
|
|
61
|
+
description: Any relevant details about the adapter implementation.
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
concurrency:
|
|
10
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
11
|
+
cancel-in-progress: true
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
quality:
|
|
15
|
+
name: Lint & Type Check
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
- uses: astral-sh/setup-uv@v4
|
|
20
|
+
with:
|
|
21
|
+
version: "latest"
|
|
22
|
+
- uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version: "3.12"
|
|
25
|
+
|
|
26
|
+
- name: Install dependencies
|
|
27
|
+
run: uv sync --extra dev
|
|
28
|
+
|
|
29
|
+
- name: Lint with ruff
|
|
30
|
+
run: uv run ruff check src/ tests/
|
|
31
|
+
|
|
32
|
+
- name: Format check with ruff
|
|
33
|
+
run: uv run ruff format --check src/ tests/
|
|
34
|
+
|
|
35
|
+
- name: Type check with mypy
|
|
36
|
+
run: uv run mypy src/mnemebrain_benchmark/
|
|
37
|
+
|
|
38
|
+
test:
|
|
39
|
+
name: Tests (Python ${{ matrix.python-version }})
|
|
40
|
+
needs: quality
|
|
41
|
+
runs-on: ubuntu-latest
|
|
42
|
+
strategy:
|
|
43
|
+
matrix:
|
|
44
|
+
python-version: ["3.12", "3.13"]
|
|
45
|
+
steps:
|
|
46
|
+
- uses: actions/checkout@v4
|
|
47
|
+
- uses: astral-sh/setup-uv@v4
|
|
48
|
+
with:
|
|
49
|
+
version: "latest"
|
|
50
|
+
- uses: actions/setup-python@v5
|
|
51
|
+
with:
|
|
52
|
+
python-version: ${{ matrix.python-version }}
|
|
53
|
+
|
|
54
|
+
- name: Install dependencies
|
|
55
|
+
run: uv sync --extra dev
|
|
56
|
+
|
|
57
|
+
- name: Run tests with coverage
|
|
58
|
+
run: uv run pytest --cov=mnemebrain_benchmark --cov-report=xml --cov-report=term-missing -q
|
|
59
|
+
|
|
60
|
+
- name: Upload coverage
|
|
61
|
+
if: matrix.python-version == '3.12'
|
|
62
|
+
uses: actions/upload-artifact@v4
|
|
63
|
+
with:
|
|
64
|
+
name: coverage-report
|
|
65
|
+
path: coverage.xml
|
|
66
|
+
|
|
67
|
+
coverage-gate:
|
|
68
|
+
name: Coverage Gate
|
|
69
|
+
needs: test
|
|
70
|
+
runs-on: ubuntu-latest
|
|
71
|
+
steps:
|
|
72
|
+
- uses: actions/checkout@v4
|
|
73
|
+
- uses: astral-sh/setup-uv@v4
|
|
74
|
+
with:
|
|
75
|
+
version: "latest"
|
|
76
|
+
- uses: actions/setup-python@v5
|
|
77
|
+
with:
|
|
78
|
+
python-version: "3.12"
|
|
79
|
+
|
|
80
|
+
- name: Install dependencies
|
|
81
|
+
run: uv sync --extra dev
|
|
82
|
+
|
|
83
|
+
- name: Check coverage threshold
|
|
84
|
+
run: uv run pytest --cov=mnemebrain_benchmark --cov-fail-under=80 -q
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
name: "CodeQL Advanced"
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ "main" ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ "main" ]
|
|
8
|
+
schedule:
|
|
9
|
+
- cron: '15 0 * * 4'
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
analyze:
|
|
13
|
+
name: Analyze (${{ matrix.language }})
|
|
14
|
+
runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
|
|
15
|
+
permissions:
|
|
16
|
+
security-events: write
|
|
17
|
+
packages: read
|
|
18
|
+
actions: read
|
|
19
|
+
contents: read
|
|
20
|
+
|
|
21
|
+
strategy:
|
|
22
|
+
fail-fast: false
|
|
23
|
+
matrix:
|
|
24
|
+
include:
|
|
25
|
+
- language: actions
|
|
26
|
+
build-mode: none
|
|
27
|
+
- language: python
|
|
28
|
+
build-mode: none
|
|
29
|
+
|
|
30
|
+
steps:
|
|
31
|
+
- name: Checkout repository
|
|
32
|
+
uses: actions/checkout@v4
|
|
33
|
+
|
|
34
|
+
- name: Initialize CodeQL
|
|
35
|
+
uses: github/codeql-action/init@v4
|
|
36
|
+
with:
|
|
37
|
+
languages: ${{ matrix.language }}
|
|
38
|
+
build-mode: ${{ matrix.build-mode }}
|
|
39
|
+
|
|
40
|
+
- name: Run manual build steps
|
|
41
|
+
if: matrix.build-mode == 'manual'
|
|
42
|
+
shell: bash
|
|
43
|
+
run: |
|
|
44
|
+
echo 'If you are using a "manual" build mode for one or more of the' \
|
|
45
|
+
'languages you are analyzing, replace this with the commands to build' \
|
|
46
|
+
'your code, for example:'
|
|
47
|
+
echo ' make bootstrap'
|
|
48
|
+
echo ' make release'
|
|
49
|
+
exit 1
|
|
50
|
+
|
|
51
|
+
- name: Perform CodeQL Analysis
|
|
52
|
+
uses: github/codeql-action/analyze@v4
|
|
53
|
+
with:
|
|
54
|
+
category: "/language:${{matrix.language}}"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
name: 'Dependency review'
|
|
2
|
+
on:
|
|
3
|
+
pull_request:
|
|
4
|
+
branches: [ "main" ]
|
|
5
|
+
|
|
6
|
+
permissions:
|
|
7
|
+
contents: read
|
|
8
|
+
pull-requests: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
dependency-review:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- name: 'Checkout repository'
|
|
15
|
+
uses: actions/checkout@v4
|
|
16
|
+
- name: 'Dependency Review'
|
|
17
|
+
uses: actions/dependency-review-action@v4
|
|
18
|
+
with:
|
|
19
|
+
comment-summary-in-pr: always
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: Pylint
|
|
2
|
+
|
|
3
|
+
on: [push]
|
|
4
|
+
|
|
5
|
+
jobs:
|
|
6
|
+
build:
|
|
7
|
+
runs-on: ubuntu-latest
|
|
8
|
+
strategy:
|
|
9
|
+
matrix:
|
|
10
|
+
python-version: ["3.12", "3.13"]
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- name: Install uv
|
|
15
|
+
uses: astral-sh/setup-uv@v4
|
|
16
|
+
with:
|
|
17
|
+
version: "latest"
|
|
18
|
+
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
env:
|
|
21
|
+
PYTHON_VERSION: ${{ matrix.python-version }}
|
|
22
|
+
run: uv python install "$PYTHON_VERSION"
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: uv sync --extra dev && uv pip install pylint
|
|
26
|
+
|
|
27
|
+
- name: Analysing the code with pylint
|
|
28
|
+
run: >
|
|
29
|
+
uv run pylint
|
|
30
|
+
--disable=C0103,C0114,C0115,C0116,C0301,C0415,E0401,R0801,R0902,R0903,R0912,R0913,R0914,R0915,R0917,W0603,W0621,W0107,W0613,W0718,W2301
|
|
31
|
+
src/
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
id-token: write
|
|
11
|
+
packages: write
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
test:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
strategy:
|
|
17
|
+
matrix:
|
|
18
|
+
python-version: ["3.12", "3.13"]
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Install uv
|
|
23
|
+
uses: astral-sh/setup-uv@v4
|
|
24
|
+
with:
|
|
25
|
+
version: "latest"
|
|
26
|
+
|
|
27
|
+
- name: Set up Python
|
|
28
|
+
env:
|
|
29
|
+
PYTHON_VERSION: ${{ matrix.python-version }}
|
|
30
|
+
run: uv python install "$PYTHON_VERSION"
|
|
31
|
+
|
|
32
|
+
- name: Install dependencies
|
|
33
|
+
run: uv sync --extra dev
|
|
34
|
+
|
|
35
|
+
- name: Run tests
|
|
36
|
+
run: uv run pytest tests/ -v
|
|
37
|
+
|
|
38
|
+
build:
|
|
39
|
+
needs: test
|
|
40
|
+
runs-on: ubuntu-latest
|
|
41
|
+
steps:
|
|
42
|
+
- uses: actions/checkout@v4
|
|
43
|
+
|
|
44
|
+
- name: Install uv
|
|
45
|
+
uses: astral-sh/setup-uv@v4
|
|
46
|
+
with:
|
|
47
|
+
version: "latest"
|
|
48
|
+
|
|
49
|
+
- name: Set up Python
|
|
50
|
+
run: uv python install 3.12
|
|
51
|
+
|
|
52
|
+
- name: Build package
|
|
53
|
+
run: uv build
|
|
54
|
+
|
|
55
|
+
- name: Upload dist artifacts
|
|
56
|
+
uses: actions/upload-artifact@v4
|
|
57
|
+
with:
|
|
58
|
+
name: dist
|
|
59
|
+
path: dist/
|
|
60
|
+
|
|
61
|
+
publish-pypi:
|
|
62
|
+
needs: build
|
|
63
|
+
runs-on: ubuntu-latest
|
|
64
|
+
environment: pypi
|
|
65
|
+
permissions:
|
|
66
|
+
id-token: write
|
|
67
|
+
steps:
|
|
68
|
+
- name: Download dist artifacts
|
|
69
|
+
uses: actions/download-artifact@v4
|
|
70
|
+
with:
|
|
71
|
+
name: dist
|
|
72
|
+
path: dist/
|
|
73
|
+
|
|
74
|
+
- name: Publish to PyPI
|
|
75
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
76
|
+
|
|
77
|
+
publish-github:
|
|
78
|
+
needs: build
|
|
79
|
+
runs-on: ubuntu-latest
|
|
80
|
+
permissions:
|
|
81
|
+
contents: write
|
|
82
|
+
packages: write
|
|
83
|
+
steps:
|
|
84
|
+
- uses: actions/checkout@v4
|
|
85
|
+
|
|
86
|
+
- name: Download dist artifacts
|
|
87
|
+
uses: actions/download-artifact@v4
|
|
88
|
+
with:
|
|
89
|
+
name: dist
|
|
90
|
+
path: dist/
|
|
91
|
+
|
|
92
|
+
- name: Create GitHub Release
|
|
93
|
+
uses: softprops/action-gh-release@v2
|
|
94
|
+
with:
|
|
95
|
+
files: dist/*
|
|
96
|
+
generate_release_notes: true
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.pyc
|
|
3
|
+
*.pyo
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
.venv/
|
|
8
|
+
.env
|
|
9
|
+
.coverage
|
|
10
|
+
coverage.xml
|
|
11
|
+
htmlcov/
|
|
12
|
+
.pytest_cache/
|
|
13
|
+
.mypy_cache/
|
|
14
|
+
.ruff_cache/
|
|
15
|
+
*.json
|
|
16
|
+
!src/mnemebrain_benchmark/scenarios/data/*.json
|
|
17
|
+
!src/mnemebrain_benchmark/task_evals/data/*.json
|
|
18
|
+
!src/mnemebrain_benchmark/data/*.json
|
|
19
|
+
.dirigent/
|