pdbminebuilder 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdbminebuilder-0.2.0/.env.example +9 -0
- pdbminebuilder-0.2.0/.gitattributes +2 -0
- pdbminebuilder-0.2.0/.github/workflows/ci.yml +45 -0
- pdbminebuilder-0.2.0/.github/workflows/deploy-docs.yml +52 -0
- pdbminebuilder-0.2.0/.github/workflows/release.yml +68 -0
- pdbminebuilder-0.2.0/.gitignore +37 -0
- pdbminebuilder-0.2.0/CHANGELOG.md +35 -0
- pdbminebuilder-0.2.0/CLAUDE.md +309 -0
- pdbminebuilder-0.2.0/LICENSE +21 -0
- pdbminebuilder-0.2.0/PKG-INFO +136 -0
- pdbminebuilder-0.2.0/README.md +102 -0
- pdbminebuilder-0.2.0/alembic/README +1 -0
- pdbminebuilder-0.2.0/alembic/env.py +116 -0
- pdbminebuilder-0.2.0/alembic/script.py.mako +28 -0
- pdbminebuilder-0.2.0/alembic/versions/bf39ba596b4a_prd_family_replace_docid_with_name_in_.py +35 -0
- pdbminebuilder-0.2.0/alembic.ini +137 -0
- pdbminebuilder-0.2.0/config.example.yml +47 -0
- pdbminebuilder-0.2.0/config.test.yml +27 -0
- pdbminebuilder-0.2.0/docker/docker-compose.test.yml +17 -0
- pdbminebuilder-0.2.0/docker/init/01-extensions.sql +2 -0
- pdbminebuilder-0.2.0/docs/architecture.md +157 -0
- pdbminebuilder-0.2.0/docs/pipelines.md +332 -0
- pdbminebuilder-0.2.0/docs/rdb_docs/cc.yml +521 -0
- pdbminebuilder-0.2.0/docs/rdb_docs/ccmodel.yml +192 -0
- pdbminebuilder-0.2.0/docs/rdb_docs/contacts.yml +54 -0
- pdbminebuilder-0.2.0/docs/rdb_docs/emdb.yml +2872 -0
- pdbminebuilder-0.2.0/docs/rdb_docs/ihm.yml +3721 -0
- pdbminebuilder-0.2.0/docs/rdb_docs/pdbj.yml +12632 -0
- pdbminebuilder-0.2.0/docs/rdb_docs/prd.yml +687 -0
- pdbminebuilder-0.2.0/docs/rdb_docs/prd_family.yml +279 -0
- pdbminebuilder-0.2.0/docs/rdb_docs/vrpt.yml +2763 -0
- pdbminebuilder-0.2.0/docs/schema.md +201 -0
- pdbminebuilder-0.2.0/pixi.lock +3073 -0
- pdbminebuilder-0.2.0/pixi.toml +71 -0
- pdbminebuilder-0.2.0/pyproject.toml +54 -0
- pdbminebuilder-0.2.0/schemas/pkout.json +1 -0
- pdbminebuilder-0.2.0/scripts/cif2json.py +142 -0
- pdbminebuilder-0.2.0/scripts/convert_yaml_to_sa.py +572 -0
- pdbminebuilder-0.2.0/scripts/generate_rdb_docs.py +203 -0
- pdbminebuilder-0.2.0/scripts/generate_schema_docs.py +234 -0
- pdbminebuilder-0.2.0/scripts/init_rdkit.sql +54 -0
- pdbminebuilder-0.2.0/scripts/inject_column_comments.py +160 -0
- pdbminebuilder-0.2.0/scripts/postgresql_bulkload.conf +35 -0
- pdbminebuilder-0.2.0/scripts/postgresql_safe.conf +20 -0
- pdbminebuilder-0.2.0/scripts/postgresql_tuned.conf +69 -0
- pdbminebuilder-0.2.0/scripts/rdkit_functions.sql +221 -0
- pdbminebuilder-0.2.0/scripts/reload_failed.py +190 -0
- pdbminebuilder-0.2.0/scripts/verify_schema_equivalence.py +411 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/__init__.py +3 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/__main__.py +6 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/cli.py +369 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/commands/__init__.py +1 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/commands/load.py +164 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/commands/reset.py +101 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/commands/stats.py +147 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/commands/sync.py +183 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/commands/test.py +185 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/commands/update.py +212 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/commands/utils.py +47 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/config.py +140 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/db/__init__.py +5 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/db/_type_utils.py +62 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/db/connection.py +128 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/db/delta.py +764 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/db/loader.py +680 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/db/metadata.py +188 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/models/__init__.py +53 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/models/cc.py +794 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/models/ccmodel.py +283 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/models/contacts.py +87 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/models/emdb.py +3987 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/models/ihm.py +2079 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/models/pdbj.py +16913 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/models/prd.py +1044 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/models/prd_family.py +414 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/models/vrpt.py +4077 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/parsers/__init__.py +20 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/parsers/cif.py +205 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/parsers/mmjson.py +246 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/__init__.py +1 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/base.py +861 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/cc.py +822 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/ccmodel.py +508 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/contacts.py +340 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/emdb.py +413 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/ihm.py +508 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/pdbj.py +820 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/prd.py +619 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/prd_family.py +255 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/pipelines/vrpt.py +341 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/py.typed +0 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/utils/__init__.py +1 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/utils/assembly.py +232 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/utils/brief_summary.py +365 -0
- pdbminebuilder-0.2.0/src/pdbminebuilder/utils/patches.py +61 -0
- pdbminebuilder-0.2.0/tests/__init__.py +1 -0
- pdbminebuilder-0.2.0/tests/conftest.py +260 -0
- pdbminebuilder-0.2.0/tests/fixtures/cc/ATP.json.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/cc/EOH.json.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/cc/HOH.json.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/ccmodel/M_6EL_00001.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/ccmodel/M_94M_00001.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/ccmodel/M_DAL_00001.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/ccmodel/M_EOH_00001.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/pdbj/1crn.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/pdbj/1ubq.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/pdbj/4hhb.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/pdbj/5pti.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/prd/PRDCC_000001.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/prd/PRDCC_000006.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/prd/PRDCC_000007.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/prd/PRD_000001.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/prd/PRD_000006.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/prd/PRD_000007.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/fixtures/prd_family/family-all.cif.gz +0 -0
- pdbminebuilder-0.2.0/tests/integration/__init__.py +5 -0
- pdbminebuilder-0.2.0/tests/integration/test_cc_integration.py +229 -0
- pdbminebuilder-0.2.0/tests/integration/test_ccmodel_integration.py +215 -0
- pdbminebuilder-0.2.0/tests/integration/test_pdbj_integration.py +239 -0
- pdbminebuilder-0.2.0/tests/integration/test_prd_integration.py +221 -0
- pdbminebuilder-0.2.0/tests/test_assembly.py +288 -0
- pdbminebuilder-0.2.0/tests/test_base.py +314 -0
- pdbminebuilder-0.2.0/tests/test_brief_summary.py +308 -0
- pdbminebuilder-0.2.0/tests/test_cc_cif.py +700 -0
- pdbminebuilder-0.2.0/tests/test_ccmodel_cif.py +327 -0
- pdbminebuilder-0.2.0/tests/test_commands.py +279 -0
- pdbminebuilder-0.2.0/tests/test_delta.py +456 -0
- pdbminebuilder-0.2.0/tests/test_emdb.py +393 -0
- pdbminebuilder-0.2.0/tests/test_environment.py +69 -0
- pdbminebuilder-0.2.0/tests/test_format_parity.py +359 -0
- pdbminebuilder-0.2.0/tests/test_ihm.py +554 -0
- pdbminebuilder-0.2.0/tests/test_loader_migration.py +139 -0
- pdbminebuilder-0.2.0/tests/test_metadata.py +265 -0
- pdbminebuilder-0.2.0/tests/test_mmjson.py +244 -0
- pdbminebuilder-0.2.0/tests/test_model_registry.py +140 -0
- pdbminebuilder-0.2.0/tests/test_mtime_filtering.py +170 -0
- pdbminebuilder-0.2.0/tests/test_parsers.py +227 -0
- pdbminebuilder-0.2.0/tests/test_patches.py +75 -0
- pdbminebuilder-0.2.0/tests/test_pdbj.py +527 -0
- pdbminebuilder-0.2.0/tests/test_pdbj_cif.py +1180 -0
- pdbminebuilder-0.2.0/tests/test_prd_cif.py +443 -0
- pdbminebuilder-0.2.0/tests/test_prd_family_cif.py +296 -0
- pdbminebuilder-0.2.0/tests/test_type_utils.py +69 -0
- pdbminebuilder-0.2.0/website/.gitignore +20 -0
- pdbminebuilder-0.2.0/website/README.md +41 -0
- pdbminebuilder-0.2.0/website/docs/database/cc.mdx +80 -0
- pdbminebuilder-0.2.0/website/docs/database/ccmodel.mdx +14 -0
- pdbminebuilder-0.2.0/website/docs/database/contacts.mdx +14 -0
- pdbminebuilder-0.2.0/website/docs/database/emdb.mdx +14 -0
- pdbminebuilder-0.2.0/website/docs/database/ihm.mdx +14 -0
- pdbminebuilder-0.2.0/website/docs/database/overview.md +84 -0
- pdbminebuilder-0.2.0/website/docs/database/pdbj.mdx +14 -0
- pdbminebuilder-0.2.0/website/docs/database/prd.mdx +14 -0
- pdbminebuilder-0.2.0/website/docs/database/prd_family.mdx +14 -0
- pdbminebuilder-0.2.0/website/docs/database/vrpt.mdx +14 -0
- pdbminebuilder-0.2.0/website/docs/getting-started/configuration.md +145 -0
- pdbminebuilder-0.2.0/website/docs/getting-started/installation.md +155 -0
- pdbminebuilder-0.2.0/website/docs/getting-started/migration.md +207 -0
- pdbminebuilder-0.2.0/website/docs/getting-started/sync.md +99 -0
- pdbminebuilder-0.2.0/website/docs/getting-started/update.md +188 -0
- pdbminebuilder-0.2.0/website/docusaurus.config.ts +88 -0
- pdbminebuilder-0.2.0/website/package-lock.json +18448 -0
- pdbminebuilder-0.2.0/website/package.json +47 -0
- pdbminebuilder-0.2.0/website/sidebars.ts +39 -0
- pdbminebuilder-0.2.0/website/src/components/SchemaFilter.tsx +89 -0
- pdbminebuilder-0.2.0/website/src/css/custom.css +133 -0
- pdbminebuilder-0.2.0/website/src/pages/index.module.css +23 -0
- pdbminebuilder-0.2.0/website/src/pages/index.tsx +86 -0
- pdbminebuilder-0.2.0/website/src/pages/schema-search.tsx +250 -0
- pdbminebuilder-0.2.0/website/src/types/schema.ts +10 -0
- pdbminebuilder-0.2.0/website/static/.nojekyll +0 -0
- pdbminebuilder-0.2.0/website/static/data/allSchemas.json +1 -0
- pdbminebuilder-0.2.0/website/static/img/docusaurus-social-card.jpg +0 -0
- pdbminebuilder-0.2.0/website/static/img/docusaurus.png +0 -0
- pdbminebuilder-0.2.0/website/static/img/favicon.ico +0 -0
- pdbminebuilder-0.2.0/website/static/img/logo.svg +1 -0
- pdbminebuilder-0.2.0/website/static/img/undraw_docusaurus_mountain.svg +171 -0
- pdbminebuilder-0.2.0/website/static/img/undraw_docusaurus_react.svg +170 -0
- pdbminebuilder-0.2.0/website/static/img/undraw_docusaurus_tree.svg +40 -0
- pdbminebuilder-0.2.0/website/tsconfig.json +8 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
paths-ignore:
|
|
7
|
+
- "*.md"
|
|
8
|
+
- "docs/**"
|
|
9
|
+
- "website/**"
|
|
10
|
+
- "LICENSE"
|
|
11
|
+
pull_request:
|
|
12
|
+
branches: [main]
|
|
13
|
+
paths-ignore:
|
|
14
|
+
- "*.md"
|
|
15
|
+
- "docs/**"
|
|
16
|
+
- "website/**"
|
|
17
|
+
- "LICENSE"
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
lint:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- uses: prefix-dev/setup-pixi@v0.9.2
|
|
26
|
+
with:
|
|
27
|
+
cache: true
|
|
28
|
+
|
|
29
|
+
- name: Ruff check
|
|
30
|
+
run: pixi run lint
|
|
31
|
+
|
|
32
|
+
- name: Ruff format check
|
|
33
|
+
run: pixi run format-check
|
|
34
|
+
|
|
35
|
+
test:
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
steps:
|
|
38
|
+
- uses: actions/checkout@v4
|
|
39
|
+
|
|
40
|
+
- uses: prefix-dev/setup-pixi@v0.9.2
|
|
41
|
+
with:
|
|
42
|
+
cache: true
|
|
43
|
+
|
|
44
|
+
- name: Run unit tests
|
|
45
|
+
run: pixi run test-unit
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
name: Deploy Docs
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
paths: ["website/**"]
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
pages: write
|
|
11
|
+
id-token: write
|
|
12
|
+
|
|
13
|
+
concurrency:
|
|
14
|
+
group: pages
|
|
15
|
+
cancel-in-progress: false
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
build:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
defaults:
|
|
21
|
+
run:
|
|
22
|
+
working-directory: website
|
|
23
|
+
steps:
|
|
24
|
+
- uses: actions/checkout@v4
|
|
25
|
+
with:
|
|
26
|
+
fetch-depth: 0
|
|
27
|
+
|
|
28
|
+
- uses: actions/setup-node@v4
|
|
29
|
+
with:
|
|
30
|
+
node-version: 22
|
|
31
|
+
cache: npm
|
|
32
|
+
cache-dependency-path: website/package-lock.json
|
|
33
|
+
|
|
34
|
+
- run: npm ci
|
|
35
|
+
- run: npm run build
|
|
36
|
+
|
|
37
|
+
- uses: actions/upload-pages-artifact@v3
|
|
38
|
+
with:
|
|
39
|
+
path: website/build
|
|
40
|
+
|
|
41
|
+
deploy:
|
|
42
|
+
needs: build
|
|
43
|
+
permissions:
|
|
44
|
+
pages: write
|
|
45
|
+
id-token: write
|
|
46
|
+
environment:
|
|
47
|
+
name: github-pages
|
|
48
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
49
|
+
runs-on: ubuntu-latest
|
|
50
|
+
steps:
|
|
51
|
+
- uses: actions/deploy-pages@v4
|
|
52
|
+
id: deployment
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
id-token: write # Required for trusted publishing to PyPI
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
release:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Extract version from tag
|
|
19
|
+
id: version
|
|
20
|
+
run: echo "version=${GITHUB_REF_NAME#v}" >> "$GITHUB_OUTPUT"
|
|
21
|
+
|
|
22
|
+
- name: Extract release notes from CHANGELOG.md
|
|
23
|
+
id: changelog
|
|
24
|
+
run: |
|
|
25
|
+
# Extract the section for the tagged version
|
|
26
|
+
version="${{ steps.version.outputs.version }}"
|
|
27
|
+
notes=$(awk -v ver="$version" '
|
|
28
|
+
/^## \[/ {
|
|
29
|
+
if (found) exit
|
|
30
|
+
if ($0 ~ "\\[" ver "\\]") found=1; next
|
|
31
|
+
}
|
|
32
|
+
found { print }
|
|
33
|
+
' CHANGELOG.md)
|
|
34
|
+
|
|
35
|
+
if [ -z "$notes" ]; then
|
|
36
|
+
echo "::warning::No CHANGELOG entry found for version $version"
|
|
37
|
+
notes="Release $version"
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# Write to file for gh release
|
|
41
|
+
echo "$notes" > release_notes.md
|
|
42
|
+
|
|
43
|
+
- name: Create GitHub Release
|
|
44
|
+
run: |
|
|
45
|
+
gh release create "$GITHUB_REF_NAME" \
|
|
46
|
+
--title "$GITHUB_REF_NAME" \
|
|
47
|
+
--notes-file release_notes.md
|
|
48
|
+
env:
|
|
49
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
50
|
+
|
|
51
|
+
publish:
|
|
52
|
+
runs-on: ubuntu-latest
|
|
53
|
+
needs: release
|
|
54
|
+
environment: pypi
|
|
55
|
+
steps:
|
|
56
|
+
- uses: actions/checkout@v4
|
|
57
|
+
|
|
58
|
+
- uses: actions/setup-python@v5
|
|
59
|
+
with:
|
|
60
|
+
python-version: "3.12"
|
|
61
|
+
|
|
62
|
+
- name: Build package
|
|
63
|
+
run: |
|
|
64
|
+
pip install build
|
|
65
|
+
python -m build
|
|
66
|
+
|
|
67
|
+
- name: Publish to PyPI
|
|
68
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# pixi environments
|
|
2
|
+
.pixi/*
|
|
3
|
+
!.pixi/config.toml
|
|
4
|
+
|
|
5
|
+
# Python
|
|
6
|
+
__pycache__/
|
|
7
|
+
*.pyc
|
|
8
|
+
*.pyo
|
|
9
|
+
|
|
10
|
+
# Environment
|
|
11
|
+
.env
|
|
12
|
+
|
|
13
|
+
node_modules/
|
|
14
|
+
|
|
15
|
+
# Docusaurus
|
|
16
|
+
website/build/
|
|
17
|
+
website/.docusaurus/
|
|
18
|
+
website/node_modules/
|
|
19
|
+
data
|
|
20
|
+
!website/static/data/
|
|
21
|
+
logs/
|
|
22
|
+
postgres_data_5433
|
|
23
|
+
package-lock.json
|
|
24
|
+
!website/package-lock.json
|
|
25
|
+
setup-db.sh
|
|
26
|
+
config.yml
|
|
27
|
+
dist
|
|
28
|
+
.cursor
|
|
29
|
+
|
|
30
|
+
# Claude Code
|
|
31
|
+
plans/
|
|
32
|
+
|
|
33
|
+
# Build artifacts
|
|
34
|
+
*.egg-info/
|
|
35
|
+
|
|
36
|
+
# Lock files (pixi manages dependencies)
|
|
37
|
+
uv.lock
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.2.0] - 2026-03-07
|
|
11
|
+
|
|
12
|
+
Initial release as an independent Python project. Rewritten from
|
|
13
|
+
[mine2updater](https://gitlab.com/pdbjapan/mine2updater) (Node.js) by PDBj.
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
|
|
17
|
+
- 7 data pipelines: pdbj, cc, ccmodel, prd, prd_family, vrpt, contacts
|
|
18
|
+
- 2 schema-only definitions: emdb, ihm
|
|
19
|
+
- Dual format support (CIF / mmJSON) for pdbj, cc, ccmodel, prd pipelines
|
|
20
|
+
- Unified parsing via gemmi for both CIF and mmJSON
|
|
21
|
+
- Multi-process parallel loading with ProcessPoolExecutor
|
|
22
|
+
- Bulk load mode (COPY protocol) for initial data loading
|
|
23
|
+
- Mtime-based skip optimization for incremental updates
|
|
24
|
+
- RDKit PostgreSQL cartridge integration for chemical searches
|
|
25
|
+
- SMILES generation from molecular structure via ccd2rdmol
|
|
26
|
+
- SQLAlchemy Core schema definitions with Alembic migrations
|
|
27
|
+
- CLI with 9 commands: sync, update, load, all, setup-rdkit, test, reset, stats, version
|
|
28
|
+
- Pydantic-based configuration with YAML and environment variable support
|
|
29
|
+
- Documentation website with auto-generated schema docs
|
|
30
|
+
- Docker-based test environment (PostgreSQL + RDKit)
|
|
31
|
+
- PyPI publishing support with trusted publishing
|
|
32
|
+
- Environment version tests for Python and PostgreSQL
|
|
33
|
+
- Alternative installation methods (pip, conda+pip)
|
|
34
|
+
- `config.example.yml` with documented options
|
|
35
|
+
- MIT license
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
# pdb-mine-builder
|
|
2
|
+
|
|
3
|
+
PDBj (Protein Data Bank Japan) のデータを PostgreSQL にロードする CLI ツール。
|
|
4
|
+
|
|
5
|
+
## Tech Stack
|
|
6
|
+
|
|
7
|
+
- **Language**: Python 3.12+
|
|
8
|
+
- **Package Manager**: Pixi (Conda/PyPI hybrid)
|
|
9
|
+
- **Database**: PostgreSQL 17+ (version managed by rdkit-postgresql, psycopg3)
|
|
10
|
+
- **Schema**: SQLAlchemy Core (DDL only) + Alembic (migrations)
|
|
11
|
+
- **CLI**: Typer + Rich
|
|
12
|
+
- **Config**: Pydantic
|
|
13
|
+
- **Parser**: gemmi (CIF and mmJSON unified)
|
|
14
|
+
- **Chemistry**: RDKit PostgreSQL cartridge + ccd2rdmol
|
|
15
|
+
|
|
16
|
+
## Quick Start
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pixi install
|
|
20
|
+
pixi run pmb --help
|
|
21
|
+
pixi run pmb update pdbj --limit 10
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Project Structure
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
src/pdbminebuilder/
|
|
28
|
+
├── __init__.py
|
|
29
|
+
├── __main__.py # Entry point
|
|
30
|
+
├── cli.py # Typer CLI commands (9 commands)
|
|
31
|
+
├── config.py # Pydantic settings
|
|
32
|
+
├── models/
|
|
33
|
+
│ ├── __init__.py # MetaData registry (ALL_METADATA, get_metadata())
|
|
34
|
+
│ ├── cc.py # cc schema (10 tables)
|
|
35
|
+
│ ├── ccmodel.py # ccmodel schema
|
|
36
|
+
│ ├── contacts.py # contacts schema
|
|
37
|
+
│ ├── emdb.py # emdb schema (schema only, no pipeline)
|
|
38
|
+
│ ├── ihm.py # ihm schema (schema only, no pipeline)
|
|
39
|
+
│ ├── pdbj.py # pdbj schema (~400 tables)
|
|
40
|
+
│ ├── prd.py # prd schema
|
|
41
|
+
│ ├── prd_family.py # prd_family schema
|
|
42
|
+
│ └── vrpt.py # vrpt schema
|
|
43
|
+
├── db/
|
|
44
|
+
│ ├── connection.py # psycopg3 connection pool
|
|
45
|
+
│ ├── delta.py # Delta computing
|
|
46
|
+
│ ├── loader.py # Parallel loader (ProcessPoolExecutor)
|
|
47
|
+
│ ├── metadata.py # Entry metadata tracking (mtime-based skip)
|
|
48
|
+
│ └── _type_utils.py # SA type to PostgreSQL type converter
|
|
49
|
+
├── parsers/
|
|
50
|
+
│ ├── cif.py # Unified parser (CIF + mmJSON via gemmi)
|
|
51
|
+
│ └── mmjson.py # Utilities: normalize_column_name(), merge_data()
|
|
52
|
+
└── pipelines/
|
|
53
|
+
├── base.py # BasePipeline + transform_category()
|
|
54
|
+
├── pdbj.py # Main PDB data (CIF/mmJSON)
|
|
55
|
+
├── cc.py # Chemical components
|
|
56
|
+
├── ccmodel.py # Chemical component models
|
|
57
|
+
├── prd.py # BIRD data (dual data blocks)
|
|
58
|
+
├── prd_family.py # BIRD family data (CIF only)
|
|
59
|
+
├── vrpt.py # Validation reports (CIF)
|
|
60
|
+
└── contacts.py # Contact data (custom JSON)
|
|
61
|
+
alembic/ # Alembic migration config
|
|
62
|
+
├── env.py # Multi-schema support
|
|
63
|
+
└── versions/ # Migration scripts
|
|
64
|
+
docker/
|
|
65
|
+
├── docker-compose.test.yml # Test DB (PostgreSQL + RDKit)
|
|
66
|
+
└── init/
|
|
67
|
+
└── 01-extensions.sql # RDKit extension setup
|
|
68
|
+
scripts/ # One-shot utility scripts
|
|
69
|
+
tests/ # Unit + integration tests (pytest)
|
|
70
|
+
docs/ # Architecture docs
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Pipelines
|
|
74
|
+
|
|
75
|
+
### Pipeline List
|
|
76
|
+
|
|
77
|
+
| Pipeline | Default Format | Notes |
|
|
78
|
+
|----------|---------------|-------|
|
|
79
|
+
| pdbj | CIF | File-based (~248k files), atom_site skipped |
|
|
80
|
+
| cc | CIF | Single file (components.cif.gz), ~40k blocks |
|
|
81
|
+
| ccmodel | CIF | Single file (chem_comp_model.cif.gz) |
|
|
82
|
+
| prd | CIF | Dual file (prd-all.cif.gz + prdcc-all.cif.gz) |
|
|
83
|
+
| prd_family | CIF | Single file (family-all.cif.gz), CIF only |
|
|
84
|
+
| vrpt | CIF | Uses gemmi.CifWalk for nested directory structure |
|
|
85
|
+
| contacts | JSON | Array format, not mmJSON |
|
|
86
|
+
| emdb | - | Schema only, no pipeline implementation |
|
|
87
|
+
| ihm | - | Schema only, no pipeline implementation |
|
|
88
|
+
|
|
89
|
+
### Format Selection (Dual-Format Pipelines)
|
|
90
|
+
|
|
91
|
+
Pipelines pdbj, cc, ccmodel, prd support both CIF and mmJSON.
|
|
92
|
+
Format is selected via `format` field in `config.yml`:
|
|
93
|
+
|
|
94
|
+
```yaml
|
|
95
|
+
pipelines:
|
|
96
|
+
pdbj:
|
|
97
|
+
format: cif # "cif" (default) or "mmjson"
|
|
98
|
+
data: /path/to/data/
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Backward Compatibility
|
|
102
|
+
|
|
103
|
+
Legacy pipeline names (`pdbj-cif`, `cc-cif`, `pdbj-json`, `cc-json`, etc.)
|
|
104
|
+
are still accepted but deprecated. They emit a warning and resolve to the
|
|
105
|
+
base pipeline name. Note: `-json` aliases resolve to the base name but do NOT
|
|
106
|
+
change the `format` config — users must set `format: mmjson` in config.yml.
|
|
107
|
+
|
|
108
|
+
### Mtime-Based Skip Optimization
|
|
109
|
+
|
|
110
|
+
The `entry_metadata` table tracks file modification times. During incremental
|
|
111
|
+
updates, unchanged entries are automatically skipped. Use `--force` flag to
|
|
112
|
+
bypass mtime checks and reprocess all entries.
|
|
113
|
+
|
|
114
|
+
## CLI Commands
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
pixi run pmb sync <target> # Sync data via rsync
|
|
118
|
+
pixi run pmb update <pipeline> # Incremental update (--limit, --workers, --force)
|
|
119
|
+
pixi run pmb load <pipeline> # Bulk load via COPY protocol (initial load)
|
|
120
|
+
pixi run pmb all # Full sync + update cycle
|
|
121
|
+
pixi run pmb setup-rdkit # Setup RDKit extension
|
|
122
|
+
pixi run pmb test [pipeline...] # Run test pipelines against test DB
|
|
123
|
+
pixi run pmb reset <schema|all> # Drop and reset schemas
|
|
124
|
+
pixi run pmb stats # Show database statistics
|
|
125
|
+
pixi run pmb --version # Show version
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Key Patterns
|
|
129
|
+
|
|
130
|
+
### Unified Parsing (gemmi)
|
|
131
|
+
Both CIF and mmJSON are parsed via gemmi, returning row-oriented dicts:
|
|
132
|
+
```python
|
|
133
|
+
from pdbminebuilder.parsers.cif import parse_cif_file, parse_mmjson_file
|
|
134
|
+
|
|
135
|
+
# CIF files (supports .cif.gz)
|
|
136
|
+
data = parse_cif_file(filepath)
|
|
137
|
+
|
|
138
|
+
# mmJSON files (supports .json.gz)
|
|
139
|
+
data = parse_mmjson_file(filepath)
|
|
140
|
+
|
|
141
|
+
# Both return: {"category": [{"col": "val", ...}, ...], "_block_name": "..."}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Column Name Normalization
|
|
145
|
+
mmJSON uses `column[1][2]` → schema uses `column12`
|
|
146
|
+
```python
|
|
147
|
+
from pdbminebuilder.parsers.mmjson import normalize_column_name
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Schema Access (SQLAlchemy Core)
|
|
151
|
+
Schema definitions use SQLAlchemy Core Table objects with metadata:
|
|
152
|
+
```python
|
|
153
|
+
from pdbminebuilder.models import get_metadata, ALL_METADATA
|
|
154
|
+
from pdbminebuilder.db.loader import get_table, get_all_tables, get_entry_pk
|
|
155
|
+
|
|
156
|
+
meta = get_metadata("cc") # MetaData with schema="cc"
|
|
157
|
+
table = get_table(meta, "brief_summary") # SA Table object
|
|
158
|
+
pk = get_entry_pk(meta) # "comp_id"
|
|
159
|
+
tables = get_all_tables(meta) # All tables in schema
|
|
160
|
+
|
|
161
|
+
# Schema/table config stored in .info dicts
|
|
162
|
+
meta.info["entry_pk"] # Schema-level primary key
|
|
163
|
+
table.info["keywords"] # Table-level keywords list
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Category Transformation
|
|
167
|
+
```python
|
|
168
|
+
from pdbminebuilder.pipelines.base import transform_category
|
|
169
|
+
|
|
170
|
+
# mmJSON: needs normalization
|
|
171
|
+
rows = transform_category(rows, table, pk_value, pk_col, normalize_column_name)
|
|
172
|
+
|
|
173
|
+
# CIF: no normalization needed (pass None)
|
|
174
|
+
rows = transform_category(rows, table, pk_value, pk_col, None)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Chemical SMILES Generation (cc pipeline)
|
|
178
|
+
Both CIF and mmJSON cc pipelines generate canonical SMILES using ccd2rdmol + RDKit.
|
|
179
|
+
This ensures consistent SMILES quality regardless of input format:
|
|
180
|
+
```python
|
|
181
|
+
from ccd2rdmol import read_ccd_block
|
|
182
|
+
|
|
183
|
+
block = gemmi.cif.read(cif_path)[0] # CIF
|
|
184
|
+
block = gemmi.cif.read_mmjson(json_path)[0] # mmJSON
|
|
185
|
+
|
|
186
|
+
result = read_ccd_block(block, sanitize_mol=True, add_conformers=False)
|
|
187
|
+
smiles = Chem.MolToSmiles(result.mol, canonical=True)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Note: The SMILES in `pdbx_chem_comp_descriptor` is NOT used. SMILES is always
|
|
191
|
+
generated from the molecular structure for consistency and quality.
|
|
192
|
+
|
|
193
|
+
### RDKit PostgreSQL Cartridge
|
|
194
|
+
Chemical searches use RDKit extension (auto-configured on `cc` pipeline run):
|
|
195
|
+
```sql
|
|
196
|
+
-- Substructure search
|
|
197
|
+
SELECT * FROM cc.brief_summary WHERE mol @> 'c1ccccc1'::mol;
|
|
198
|
+
|
|
199
|
+
-- Similarity search (Tanimoto)
|
|
200
|
+
SELECT *, tanimoto_sml(morganbv_fp(mol), morganbv_fp('CCO'::mol))
|
|
201
|
+
FROM cc.brief_summary WHERE morganbv_fp(mol) % morganbv_fp('CCO'::mol);
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Parallel Processing
|
|
205
|
+
- Workers create own DB connections (not pool)
|
|
206
|
+
- `ProcessPoolExecutor` with configurable worker count
|
|
207
|
+
|
|
208
|
+
## Development
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
pixi run lint # ruff check
|
|
212
|
+
pixi run format # ruff format
|
|
213
|
+
pixi run test # pytest (unit tests)
|
|
214
|
+
pixi run check # all checks (lint, format)
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Testing
|
|
218
|
+
|
|
219
|
+
**IMPORTANT**: テスト実行前に必ず Docker の test DB を起動すること。
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
# 1. Test DB を起動 (PostgreSQL + RDKit)
|
|
223
|
+
pixi run test-db-up
|
|
224
|
+
|
|
225
|
+
# 2. Test DB の状態確認
|
|
226
|
+
pixi run test-db-status
|
|
227
|
+
|
|
228
|
+
# 3. テスト実行
|
|
229
|
+
pixi run test # All tests
|
|
230
|
+
pixi run test-unit # Unit tests only
|
|
231
|
+
pixi run test-integration # Integration tests (requires test DB)
|
|
232
|
+
|
|
233
|
+
# 4. Test DB を停止
|
|
234
|
+
pixi run test-db-down
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Test DB details:
|
|
238
|
+
- Image: `mcs07/postgres-rdkit:latest` (PostgreSQL 17 + RDKit)
|
|
239
|
+
- Container: `pmb-postgres-test`
|
|
240
|
+
- Port: `15433`
|
|
241
|
+
- Database: `pmb_test`
|
|
242
|
+
- Config: `config.test.yml`
|
|
243
|
+
|
|
244
|
+
### Database Migrations (Alembic)
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
pixi run db-migrate "description" # Generate migration
|
|
248
|
+
pixi run db-upgrade # Apply all pending migrations
|
|
249
|
+
pixi run db-downgrade # Rollback last migration
|
|
250
|
+
pixi run db-history # Show migration history
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
Alembic is configured for multi-schema support (all 9 schemas).
|
|
254
|
+
Schema DDL is defined in `src/pdbminebuilder/models/` as SQLAlchemy Core Table objects.
|
|
255
|
+
Data operations still use psycopg3 direct connections (no SQLAlchemy Engine for data).
|
|
256
|
+
|
|
257
|
+
## Database
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
pixi run db-start # Start PostgreSQL
|
|
261
|
+
pixi run db-stop # Stop PostgreSQL
|
|
262
|
+
pixi run db-status # Show PostgreSQL status
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
Connection: `config.yml` の `rdb.constring`
|
|
266
|
+
|
|
267
|
+
### Bulk Load Mode
|
|
268
|
+
|
|
269
|
+
For initial data loading, use bulk load mode to significantly improve performance:
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
# 1. Start PostgreSQL
|
|
273
|
+
pixi run db-start
|
|
274
|
+
|
|
275
|
+
# 2. Enable bulk load mode (disables fsync, autovacuum)
|
|
276
|
+
pixi run db-bulkload-mode
|
|
277
|
+
|
|
278
|
+
# 3. Run data loading
|
|
279
|
+
pixi run pmb load cc
|
|
280
|
+
pixi run pmb load pdbj
|
|
281
|
+
# ... other pipelines
|
|
282
|
+
|
|
283
|
+
# 4. Restore safe settings
|
|
284
|
+
pixi run db-safe-mode
|
|
285
|
+
|
|
286
|
+
# 5. Run VACUUM ANALYZE
|
|
287
|
+
psql -c "VACUUM ANALYZE;"
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
**WARNING**: Bulk load mode disables crash safety. If PostgreSQL crashes during bulk load:
|
|
291
|
+
```bash
|
|
292
|
+
pixi run db-stop
|
|
293
|
+
rm -rf $PGDATA
|
|
294
|
+
pixi run db-init
|
|
295
|
+
# Re-run data loading from scratch
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
## Configuration
|
|
299
|
+
|
|
300
|
+
- `config.yml` - Production config (customize locally)
|
|
301
|
+
- `config.test.yml` - Test config (uses fixture data with `${CWD}` expansion)
|
|
302
|
+
- `.env` - Environment variables (gitignored)
|
|
303
|
+
- `.env.example` - Template
|
|
304
|
+
|
|
305
|
+
## Known Issues
|
|
306
|
+
|
|
307
|
+
- Global connection pool is for main process only; workers use direct connections
|
|
308
|
+
- Workers receive `schema_name: str` and import models inside worker function (avoids pickling SA objects)
|
|
309
|
+
- `emdb` and `ihm` have schema definitions but no pipeline implementations yet
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 N283T
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|