PyPI - autobidsify - Versions diffs - 0.5.0__tar.gz - Mend

autobidsify 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

autobidsify-0.5.0/.gitignore +65 -0
autobidsify-0.5.0/PKG-INFO +231 -0
autobidsify-0.5.0/README.md +166 -0
autobidsify-0.5.0/autobidsify/__init__.py +20 -0
autobidsify-0.5.0/autobidsify/__main__.py +432 -0
autobidsify-0.5.0/autobidsify/constants.py +119 -0
autobidsify-0.5.0/autobidsify/converters/__init__.py +11 -0
autobidsify-0.5.0/autobidsify/converters/executor.py +683 -0
autobidsify-0.5.0/autobidsify/converters/jnifti_converter.py +626 -0
autobidsify-0.5.0/autobidsify/converters/mri_convert.py +309 -0
autobidsify-0.5.0/autobidsify/converters/nirs_convert.py +337 -0
autobidsify-0.5.0/autobidsify/converters/planner.py +679 -0
autobidsify-0.5.0/autobidsify/converters/validators.py +229 -0
autobidsify-0.5.0/autobidsify/filename_tokenizer.py +584 -0
autobidsify-0.5.0/autobidsify/llm.py +484 -0
autobidsify-0.5.0/autobidsify/stages/__init__.py +21 -0
autobidsify-0.5.0/autobidsify/stages/classification.py +159 -0
autobidsify-0.5.0/autobidsify/stages/evidence.py +797 -0
autobidsify-0.5.0/autobidsify/stages/ingest.py +133 -0
autobidsify-0.5.0/autobidsify/stages/trio.py +622 -0
autobidsify-0.5.0/autobidsify/universal_core.py +1023 -0
autobidsify-0.5.0/autobidsify/utils.py +127 -0
autobidsify-0.5.0/pyproject.toml +142 -0
autobidsify-0.5.0/tests/__init__.py +0 -0
autobidsify-0.5.0/tests/test_utils.py +288 -0

autobidsify-0.5.0/.gitignore ADDED Viewed

@@ -0,0 +1,65 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+venv/
+env/
+ENV/
+.venv/
+auto_bidsify/
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+# Project specific - Data directories
+datasets/
+outputs/
+# Project specific - Staging and temp
+_staging/
+*.log
+*.tmp
+# API keys and secrets
+.env
+.env.local
+*.key
+*.pem
+# Jupyter
+.ipynb_checkpoints/
+*.ipynb
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+# Backups
+backups/
+*.backup
+*.bak

autobidsify-0.5.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,231 @@
+Metadata-Version: 2.4
+Name: autobidsify
+Version: 0.5.0
+Summary: Automated BIDS standardization tool powered by LLM-first architecture
+Project-URL: Homepage, https://github.com/fangzhouliucode/autobidsify
+Project-URL: Documentation, https://autobidsify.readthedocs.io
+Project-URL: Repository, https://github.com/fangzhouliucode/autobidsify
+Project-URL: Issues, https://github.com/fangzhouliucode/autobidsify/issues
+Project-URL: Changelog, https://github.com/fangzhouliucode/autobidsify/blob/main/CHANGELOG.md
+Author-email: Yiyi Liu <yiyi.liu3@northeastern.edu>
+Maintainer-email: Yiyi Liu <yiyi.liu3@northeastern.edu>
+License: MIT
+Keywords: bids,brain-imaging,data-standardization,dicom,fnirs,medical-imaging,mri,neuroimaging,nifti
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Healthcare Industry
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: Topic :: Scientific/Engineering :: Image Processing
+Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
+Requires-Python: >=3.10
+Requires-Dist: h5py>=3.8.0
+Requires-Dist: nibabel>=5.0.0
+Requires-Dist: numpy>=1.24.0
+Requires-Dist: openai>=1.0.0
+Requires-Dist: openpyxl>=3.1.0
+Requires-Dist: pandas>=2.0.0
+Requires-Dist: pdfplumber>=0.10.0
+Requires-Dist: pydicom>=2.4.0
+Requires-Dist: pypdf2>=3.0.0
+Requires-Dist: python-docx>=1.0.0
+Requires-Dist: pyyaml>=6.0
+Requires-Dist: scipy>=1.10.0
+Provides-Extra: all
+Requires-Dist: black>=23.0; extra == 'all'
+Requires-Dist: mypy>=1.0; extra == 'all'
+Requires-Dist: myst-parser>=2.0; extra == 'all'
+Requires-Dist: pre-commit>=3.0; extra == 'all'
+Requires-Dist: pytest-cov>=4.0; extra == 'all'
+Requires-Dist: pytest>=7.0; extra == 'all'
+Requires-Dist: python-dotenv>=1.0.0; extra == 'all'
+Requires-Dist: ruff>=0.1.0; extra == 'all'
+Requires-Dist: sphinx-rtd-theme>=1.0; extra == 'all'
+Requires-Dist: sphinx>=6.0; extra == 'all'
+Provides-Extra: dev
+Requires-Dist: black>=23.0; extra == 'dev'
+Requires-Dist: mypy>=1.0; extra == 'dev'
+Requires-Dist: pre-commit>=3.0; extra == 'dev'
+Requires-Dist: pytest-cov>=4.0; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'
+Requires-Dist: ruff>=0.1.0; extra == 'dev'
+Provides-Extra: docs
+Requires-Dist: myst-parser>=2.0; extra == 'docs'
+Requires-Dist: sphinx-rtd-theme>=1.0; extra == 'docs'
+Requires-Dist: sphinx>=6.0; extra == 'docs'
+Provides-Extra: dotenv
+Requires-Dist: python-dotenv>=1.0.0; extra == 'dotenv'
+Description-Content-Type: text/markdown
+# auto-bidsify
+Automated BIDS standardization tool powered by LLM-first architecture.
+## Features
+- **General compatibility**: Handles diverse dataset structures (flat, hierarchical, multi-site)
+- **Multi-modal support**: MRI, fNIRS, and mixed modality datasets
+- **Intelligent metadata extraction**: Automatic participant demographics from DICOM headers, documents, and filenames
+- **Format conversion**: DICOM→NIfTI, CSV→SNIRF, and more
+- **Evidence-based reasoning**: Confidence scoring and provenance tracking for all decisions
+## Supported Formats
+**Input formats:**
+- MRI: DICOM, NIfTI (.nii, .nii.gz)
+- fNIRS: SNIRF, Homer3 (.nirs), CSV/TSV tables
+- Documents: PDF, DOCX, TXT, Markdown, ...
+**Output:** BIDS-compliant dataset (v1.10.0)
+## Quick Start
+### Installation
+```bash
+# Clone repository
+git clone https://github.com/yourusername/auto-bidsify.git
+cd auto-bidsify
+# Setup environment
+conda create -n bidsify python=3.10
+conda activate bidsify
+pip install -r requirements.txt
+# Set OpenAI API key
+export OPENAI_API_KEY="your-key-here"
+```
+### Basic Usage
+```bash
+# Full pipeline (one command)
+python cli.py full \
+  --input /path/to/your/data \
+  --output outputs/my_dataset \
+  --model gpt-4o \
+  --modality mri
+# Step-by-step execution
+python cli.py ingest --input data.zip --output outputs/run
+python cli.py evidence --output outputs/run --modality mri
+python cli.py trio --output outputs/run --model gpt-4o
+python cli.py plan --output outputs/run --model gpt-4o
+python cli.py execute --output outputs/run
+python cli.py validate --output outputs/run
+```
+### Command Options
+```bash
+--input PATH          Input data (archive or directory)
+--output PATH         Output directory
+--model MODEL         LLM model (default: gpt-4o)
+--modality TYPE       Data modality: mri|nirs|mixed
+--nsubjects N         Number of subjects (optional)
+--describe "TEXT"     Dataset description (recommended)
+```
+## Pipeline Stages
+| Stage |   Command   |      Input      |           Output           |               Purpose              |
+|-------|-------------|-----------------|----------------------------|------------------------------------|
+|   1   | `ingest`    | Raw data        | `ingest_info.json`         | Extract/reference data	          |
+|   2   | `evidence`  | All files       | `evidence_bundle.json`     | Analyze structure, detect subjects |
+|   3   | `classify`  | Mixed data      | `classification_plan.json` | Separate MRI/fNIRS (optional)      |
+|   4   | `trio`      | Evidence   	| BIDS trio files            | Generate metadata files            |
+|   5   | `plan`      | Evidence + trio | `BIDSPlan.yaml`            | Create conversion strategy 	  |
+|   6   | `execute`   | Plan            | `bids_compatible/`         | Execute conversions 		  |
+|   7   | `validate`  | BIDS dataset    | Validation report          | Check compliance 		  |
+## Output Structure
+```
+outputs/my_dataset/
+  bids_compatible/              # Final BIDS dataset
+    dataset_description.json
+    README.md
+    participants.tsv
+    sub-001/
+      anat/
+        sub-001_T1w.nii.gz
+      func/
+        sub-001_task-rest_bold.nii.gz
+  _staging/                     # Intermediate files
+    evidence_bundle.json
+    BIDSPlan.yaml
+    conversion_log.json
+```
+## Examples
+### Example 1: Single-site MRI study
+```bash
+python cli.py full \
+  --input brain_scans/ \
+  --output outputs/study1 \
+  --nsubjects 50 \
+  --model gpt-4o \
+  --modality mri
+```
+### Example 2: Multi-site dataset with description
+```bash
+python cli.py full \
+  --input camcan_data/ \
+  --output outputs/camcan \
+  --model gpt-4o \
+  --modality mri \
+  --describe "Cambridge Centre for Ageing and Neuroscience: 650 participants, ages 18-88, multi-site MRI study"
+```
+### Example 3: fNIRS dataset from CSV
+```bash
+python cli.py full \
+  --input fnirs_study/ \
+  --output outputs/fnirs \
+  --model gpt-4o \
+  --modality nirs \
+  --describe "Prefrontal cortex activation during cognitive tasks, 30 subjects"
+```
+## Architecture
+**LLM-First Design:**
+- **Python**: Deterministic operations (file I/O, format conversion, validation)
+- **LLM**: Semantic understanding (file classification, metadata extraction, pattern recognition)
+- **Hybrid**: Best of both worlds - reliability + flexibility
+## Requirements
+- Python 3.10+
+- OpenAI API key
+- Optional: `dcm2niix` for DICOM conversion
+- Optional: `bids-validator` for validation
+## Current Status
+**Version:** 1.0 (LLM-First Architecture with Evidence-Based Reasoning)
+**Tested datasets:**
+- Visible Human Project (flat structure, CT scans)
+- CamCAN (hierarchical, multi-site, 1288 subjects)
+- [Your dataset here - help us test!]
+**Known limitations:**
+- Classification stage (Stage 3) and mat/spreadsheet conversion is experimental
+- Some edge cases in participant metadata extraction
+## Contributing
+We need YOUR datasets to improve robustness! Please test and report:
+- Success cases
+- Failure cases
+- Edge cases

autobidsify-0.5.0/README.md ADDED Viewed

@@ -0,0 +1,166 @@
+# auto-bidsify
+Automated BIDS standardization tool powered by LLM-first architecture.
+## Features
+- **General compatibility**: Handles diverse dataset structures (flat, hierarchical, multi-site)
+- **Multi-modal support**: MRI, fNIRS, and mixed modality datasets
+- **Intelligent metadata extraction**: Automatic participant demographics from DICOM headers, documents, and filenames
+- **Format conversion**: DICOM→NIfTI, CSV→SNIRF, and more
+- **Evidence-based reasoning**: Confidence scoring and provenance tracking for all decisions
+## Supported Formats
+**Input formats:**
+- MRI: DICOM, NIfTI (.nii, .nii.gz)
+- fNIRS: SNIRF, Homer3 (.nirs), CSV/TSV tables
+- Documents: PDF, DOCX, TXT, Markdown, ...
+**Output:** BIDS-compliant dataset (v1.10.0)
+## Quick Start
+### Installation
+```bash
+# Clone repository
+git clone https://github.com/yourusername/auto-bidsify.git
+cd auto-bidsify
+# Setup environment
+conda create -n bidsify python=3.10
+conda activate bidsify
+pip install -r requirements.txt
+# Set OpenAI API key
+export OPENAI_API_KEY="your-key-here"
+```
+### Basic Usage
+```bash
+# Full pipeline (one command)
+python cli.py full \
+  --input /path/to/your/data \
+  --output outputs/my_dataset \
+  --model gpt-4o \
+  --modality mri
+# Step-by-step execution
+python cli.py ingest --input data.zip --output outputs/run
+python cli.py evidence --output outputs/run --modality mri
+python cli.py trio --output outputs/run --model gpt-4o
+python cli.py plan --output outputs/run --model gpt-4o
+python cli.py execute --output outputs/run
+python cli.py validate --output outputs/run
+```
+### Command Options
+```bash
+--input PATH          Input data (archive or directory)
+--output PATH         Output directory
+--model MODEL         LLM model (default: gpt-4o)
+--modality TYPE       Data modality: mri|nirs|mixed
+--nsubjects N         Number of subjects (optional)
+--describe "TEXT"     Dataset description (recommended)
+```
+## Pipeline Stages
+| Stage |   Command   |      Input      |           Output           |               Purpose              |
+|-------|-------------|-----------------|----------------------------|------------------------------------|
+|   1   | `ingest`    | Raw data        | `ingest_info.json`         | Extract/reference data	          |
+|   2   | `evidence`  | All files       | `evidence_bundle.json`     | Analyze structure, detect subjects |
+|   3   | `classify`  | Mixed data      | `classification_plan.json` | Separate MRI/fNIRS (optional)      |
+|   4   | `trio`      | Evidence   	| BIDS trio files            | Generate metadata files            |
+|   5   | `plan`      | Evidence + trio | `BIDSPlan.yaml`            | Create conversion strategy 	  |
+|   6   | `execute`   | Plan            | `bids_compatible/`         | Execute conversions 		  |
+|   7   | `validate`  | BIDS dataset    | Validation report          | Check compliance 		  |
+## Output Structure
+```
+outputs/my_dataset/
+  bids_compatible/              # Final BIDS dataset
+    dataset_description.json
+    README.md
+    participants.tsv
+    sub-001/
+      anat/
+        sub-001_T1w.nii.gz
+      func/
+        sub-001_task-rest_bold.nii.gz
+  _staging/                     # Intermediate files
+    evidence_bundle.json
+    BIDSPlan.yaml
+    conversion_log.json
+```
+## Examples
+### Example 1: Single-site MRI study
+```bash
+python cli.py full \
+  --input brain_scans/ \
+  --output outputs/study1 \
+  --nsubjects 50 \
+  --model gpt-4o \
+  --modality mri
+```
+### Example 2: Multi-site dataset with description
+```bash
+python cli.py full \
+  --input camcan_data/ \
+  --output outputs/camcan \
+  --model gpt-4o \
+  --modality mri \
+  --describe "Cambridge Centre for Ageing and Neuroscience: 650 participants, ages 18-88, multi-site MRI study"
+```
+### Example 3: fNIRS dataset from CSV
+```bash
+python cli.py full \
+  --input fnirs_study/ \
+  --output outputs/fnirs \
+  --model gpt-4o \
+  --modality nirs \
+  --describe "Prefrontal cortex activation during cognitive tasks, 30 subjects"
+```
+## Architecture
+**LLM-First Design:**
+- **Python**: Deterministic operations (file I/O, format conversion, validation)
+- **LLM**: Semantic understanding (file classification, metadata extraction, pattern recognition)
+- **Hybrid**: Best of both worlds - reliability + flexibility
+## Requirements
+- Python 3.10+
+- OpenAI API key
+- Optional: `dcm2niix` for DICOM conversion
+- Optional: `bids-validator` for validation
+## Current Status
+**Version:** 1.0 (LLM-First Architecture with Evidence-Based Reasoning)
+**Tested datasets:**
+- Visible Human Project (flat structure, CT scans)
+- CamCAN (hierarchical, multi-site, 1288 subjects)
+- [Your dataset here - help us test!]
+**Known limitations:**
+- Classification stage (Stage 3) and mat/spreadsheet conversion is experimental
+- Some edge cases in participant metadata extraction
+## Contributing
+We need YOUR datasets to improve robustness! Please test and report:
+- Success cases
+- Failure cases
+- Edge cases

autobidsify-0.5.0/autobidsify/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""
+autobidsify: Automated BIDS standardization tool powered by LLM-first architecture.
+"""
+__version__ = "1.0.0"
+__author__ = "Your Name"
+from autobidsify.utils import info, warn, fatal, debug
+from autobidsify.constants import BIDS_VERSION, MODALITY_MRI, MODALITY_NIRS
+__all__ = [
+    "__version__",
+    "info",
+    "warn",
+    "fatal",
+    "debug",
+    "BIDS_VERSION",
+    "MODALITY_MRI",
+    "MODALITY_NIRS",
+]