PyPI - sdg-hub - Versions diffs - 0.1.0a3__tar.gz → 0.1.1__tar.gz - Mend

sdg-hub 0.1.0a3tar.gz → 0.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (292) hide show

{sdg_hub-0.1.0a3 → sdg_hub-0.1.1}/.github/workflows/actionlint.dockerfile RENAMED Viewed

@@ -1,3 +1,3 @@
 # Since dependabot cannot update workflows using docker,
 # we use this indirection since dependabot can update this file.
-FROM rhysd/actionlint:1.7.1@sha256:435ecdb63b1169e80ca3e136290072548c07fc4d76a044cf5541021712f8f344
+FROM rhysd/actionlint:1.7.7@sha256:887a259a5a534f3c4f36cb02dca341673c6089431057242cdc931e9f133147e9

{sdg_hub-0.1.0a3 → sdg_hub-0.1.1}/.github/workflows/actionlint.yml RENAMED Viewed

@@ -30,14 +30,12 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: "Harden Runner"
-        uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
         with:
           egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
       - name: "Checkout"
         uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
-        with:
-          fetch-depth: 0
       - name: "Download actionlint"
         run: |

{sdg_hub-0.1.0a3 → sdg_hub-0.1.1}/.github/workflows/docs.yml RENAMED Viewed

@@ -33,14 +33,12 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: "Harden Runner"
-        uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
         with:
           egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
       - name: "Checkout"
         uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
-        with:
-          fetch-depth: 0
       - name: "Check Markdown documents"
-        uses: DavidAnson/markdownlint-cli2-action@b4c9feab76d8025d1e83c653fa3990936df0e6c8 # v16.0.0
+        uses: DavidAnson/markdownlint-cli2-action@992badcdf24e3b8eb7e87ff9287fe931bcb00c6e # v20.0.0
         with:
           globs: '**/*.md'

{sdg_hub-0.1.0a3 → sdg_hub-0.1.1}/.github/workflows/e2e.yml RENAMED Viewed

@@ -51,7 +51,7 @@ jobs:
       - name: Install Packages
         run: |
-          sudo apt-get install -y cuda-toolkit git cmake build-essential virtualenv
+          sudo apt-get install -y cuda-toolkit git cmake build-essential
           nvidia-smi
           sudo ls -l /dev/nvidia*
@@ -75,7 +75,7 @@ jobs:
           # config contains DEFAULT_MODEL
           key: huggingface-${{ hashFiles('src/instructlab/configuration.py') }}
-      - name: Install instructlab and instructlab-sdg
+      - name: Install instructlab and sdg_hub
         run: |
           export PATH="/home/runner/.local/bin:/usr/local/cuda/bin:$PATH"
           python3 -m venv venv
@@ -89,7 +89,7 @@ jobs:
           # install instructlab
           python3 -m pip install .
           cd ..
-          # Install instructlab-sdg
+          # Install sdg_hub
           python3 -m pip install .
       - name: Run e2e test

{sdg_hub-0.1.0a3 → sdg_hub-0.1.1}/.github/workflows/pypi.yaml RENAMED Viewed

@@ -37,7 +37,7 @@ jobs:
         runs-on: ubuntu-latest
         steps:
             - name: "Harden Runner"
-              uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
+              uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
               with:
                   egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
@@ -49,7 +49,7 @@ jobs:
                   fetch-depth: 0
             - name: "Build and Inspect"
-              uses: hynek/build-and-inspect-python-package@b5076c307dc91924a82ad150cdd1533b444d3310 # v2.12.0
+              uses: hynek/build-and-inspect-python-package@c52c3a4710070b50470d903818a7b25115dcd076 # v2.13.0
     # push to Test PyPI on
     # - a new GitHub release is published
@@ -67,12 +67,12 @@ jobs:
         steps:
             - name: "Harden Runner"
-              uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
+              uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
               with:
                   egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
             - name: "Download build artifacts"
-              uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
+              uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
               with:
                   name: Packages
                   path: dist
@@ -99,12 +99,12 @@ jobs:
         steps:
             - name: "Harden Runner"
-              uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
+              uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
               with:
                   egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
             - name: "Download build artifacts"
-              uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
+              uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
               with:
                   name: Packages
                   path: dist

{sdg_hub-0.1.0a3 → sdg_hub-0.1.1}/.github/workflows/test.yml RENAMED Viewed

@@ -51,7 +51,7 @@ jobs:
             platform: "macos-latest"
     steps:
       - name: "Harden Runner"
-        uses: step-security/harden-runner@c6295a65d1254861815972266d5933fd6e532bdf # v2.11.1
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
         with:
           egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs
@@ -102,12 +102,19 @@ jobs:
       - name: Run unit tests with tox
         run: |
-          tox
+          tox -e py3-unitcov
       - name: Remove llama-cpp-python from cache
         if: always()
         run: |
           pip cache remove llama_cpp_python
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          file: ./coverage.xml
+          fail_ci_if_error: true
   test-workflow-complete:
     needs: ["test"]

{sdg_hub-0.1.0a3 → sdg_hub-0.1.1}/.gitignore RENAMED Viewed

@@ -117,6 +117,14 @@ ipython_config.py
 .pdm-python
 .pdm-build/
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+uv.lock
+.uv_cache/
+.python-version
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
@@ -135,6 +143,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+sdg_env/
 dictionary.dic
 # Spyder project settings

{sdg_hub-0.1.0a3 → sdg_hub-0.1.1}/.markdownlint-cli2.yaml RENAMED Viewed

@@ -17,3 +17,5 @@ ignores:
   - "venv/**"
   - ".venv/**"
   - ".tox/**"
+  - "examples/**"
+  - "!examples/**/README.md"

sdg_hub-0.1.1/CLAUDE.md ADDED Viewed

@@ -0,0 +1,100 @@
+# CLAUDE.md
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+## Overview
+SDG Hub is a modular synthetic data generation toolkit for LLMs. The framework is built around YAML-configured flows that chain computational blocks together to process and generate data.
+## Development Commands
+### Code Style
+- Use numpy style docstrings
+- All functions and methods must include python type hints
+- Write ruff-compliant code
+### Testing
+- Run all tests: `pytest tests/`
+- Run specific test: `pytest tests/test_filename.py`
+- Run tests with coverage: `tox -e py3-unitcov`
+### Linting and Code Quality
+- Format code: `tox -e ruff fix` or `./scripts/ruff.sh fix`
+- Check code formatting: `tox -e ruff check`
+- Run linting: `tox -e lint` (full pylint) or `tox -e fastlint` (faster)
+- Type checking: `tox -e mypy`
+- Run all checks: `make verify` (runs fastlint, mypy, ruff via tox)
+### Build and Install
+- Install for development: `pip install -e .[dev]`
+- Install with web interface: `pip install -e .[web_interface]`
+- Install with examples dependencies: `pip install -e .[examples]`
+### Git Workflow
+- **IMPORTANT**: Always create a feature branch and never push directly to main
+- **Use git worktrees for local development**: `git worktree add ../feature-branch-name feature-branch-name`
+- Create branch: `git checkout -b feature-branch-name`
+- Push to branch: `git push origin feature-branch-name`
+## Architecture
+### Core Components
+1. **Blocks** (`src/sdg_hub/blocks/`): Fundamental computational units
+   - `Block`: Abstract base class for all blocks
+   - `LLMBlock`: Language model generation blocks
+   - Utility blocks: filtering, data transformation, column operations
+2. **Flows** (`src/sdg_hub/flow.py`): Orchestrates blocks in YAML-defined pipelines
+   - Loads YAML configurations
+   - Manages block execution order
+   - Handles data flow between blocks
+3. **Registry System** (`src/sdg_hub/registry.py`):
+   - `BlockRegistry`: Manages available block types
+   - `PromptRegistry`: Manages prompt configurations
+4. **Prompts** (`src/sdg_hub/configs/`): YAML-based LLM instruction templates
+   - Support Jinja2 templating with variable injection
+   - Include system instructions, principles, examples, and generation templates
+### Data Flow
+- Uses Hugging Face Datasets (Arrow tables) for data representation
+- Supports checkpointing for long-running flows
+- Blocks process datasets and pass results to subsequent blocks
+### Flow Configuration
+Flows are defined in YAML files with this structure:
+```yaml
+- block_type: LLMBlock
+  block_config:
+    block_name: unique_name
+    config_path: path/to/prompt.yaml
+    model_id: model_name
+    output_cols: [column_names]
+  gen_kwargs:
+    max_tokens: 512
+```
+### Block Development
+When creating new blocks:
+1. Inherit from `Block` base class
+2. Register with `@BlockRegistry.register("BlockName")`
+3. Implement `generate()` method
+4. Use `_validate()` for input validation
+5. Use `_load_config()` for YAML configuration loading
+### Testing Conventions
+- Unit tests in `tests/` directory
+- Test data in `testdata/` subdirectories
+- Use pytest fixtures for common test setup
+- Test both positive and negative cases
+- Include edge cases and error conditions
+## Additional Tips
+- Use `rg` in favor of `grep` whenever it's available
+- Use `uv` for Python environment management: always start with `uv sync --extra dev` to init the env and run stuff with `uv run`

sdg_hub-0.1.1/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,30 @@
+# Contributing to SDG Hub
+This is a guide for getting started on contributing to SDG Hub.
+## Dev Requirements
+Install the development dependencies using the optional `dev` group:
+```bash
+pip install .[dev]
+```
+If you’re using a fresh virtual environment, this will install both the core and development requirements declared in `pyproject.toml`.
+## Linting
+SDG Hub uses a Makefile for linting.
+- CI changes should pass the Action linter - you can run this via `make actionlint`
+- Docs changes should pass the Markdown linter - you can run this via `make md-lint`
+- Code changes should pass the Code linter - you can run this via `make verify`
+## Testing
+SDG Hub uses [tox](https://tox.wiki/) for test automation and [pytest](https://docs.pytest.org/) as a test framework.
+You can run all tests by simply running the `tox -e py3-unit` command.

sdg_hub-0.1.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,190 @@
+Metadata-Version: 2.4
+Name: sdg_hub
+Version: 0.1.1
+Summary: Synthetic Data Generation
+Author-email: Red Hat AI Innovation <abhandwa@redhat.com>
+License: Apache-2.0
+Project-URL: homepage, https://ai-innovation.team/
+Project-URL: source, https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub
+Project-URL: issues, https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/issues
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: MacOS :: MacOS X
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: click<9.0.0,>=8.1.7
+Requires-Dist: datasets<4.0.0,>=2.18.0
+Requires-Dist: httpx<1.0.0,>=0.25.0
+Requires-Dist: jinja2
+Requires-Dist: openai<2.0.0,>=1.13.3
+Requires-Dist: rich
+Requires-Dist: tenacity!=8.4.0,>=8.3.0
+Requires-Dist: tqdm<5.0.0,>=4.66.2
+Provides-Extra: web-interface
+Requires-Dist: flask>=3.0.2; extra == "web-interface"
+Requires-Dist: pyyaml>=6.0.1; extra == "web-interface"
+Requires-Dist: flask-wtf>=1.2.2; extra == "web-interface"
+Provides-Extra: vllm
+Requires-Dist: vllm<0.8.4,>=0.8.0; extra == "vllm"
+Requires-Dist: torch>=2.0.0; extra == "vllm"
+Requires-Dist: transformers>=4.37.0; extra == "vllm"
+Requires-Dist: accelerate>=0.21.0; extra == "vllm"
+Requires-Dist: xformers>=0.0.22.post7; extra == "vllm"
+Provides-Extra: examples
+Requires-Dist: tabulate>=0.9.0; extra == "examples"
+Requires-Dist: transformers>=4.37.0; extra == "examples"
+Requires-Dist: langchain-text-splitters; extra == "examples"
+Requires-Dist: docling>=2.3.0; extra == "examples"
+Provides-Extra: dev
+Requires-Dist: pre-commit<4.0,>=3.0.4; extra == "dev"
+Requires-Dist: pylint<4.0,>=2.16.2; extra == "dev"
+Requires-Dist: pylint-pydantic; extra == "dev"
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: pytest-asyncio; extra == "dev"
+Requires-Dist: pytest-cov; extra == "dev"
+Requires-Dist: pytest-html; extra == "dev"
+Requires-Dist: tox<5,>=4.4.2; extra == "dev"
+Dynamic: license-file
+# SDG Hub: Synthetic Data Generation Toolkit
+[![Build](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/pypi.yaml/badge.svg?branch=main)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/pypi.yaml)
+[![Release](https://img.shields.io/github/v/release/Red-Hat-AI-Innovation-Team/sdg_hub)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/releases)
+[![License](https://img.shields.io/github/license/Red-Hat-AI-Innovation-Team/sdg_hub)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/blob/main/LICENSE)
+[![Tests](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/test.yml/badge.svg)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/test.yml)
+[![codecov](https://codecov.io/gh/Red-Hat-AI-Innovation-Team/sdg_hub/graph/badge.svg?token=SP75BCXWO2)](https://codecov.io/gh/Red-Hat-AI-Innovation-Team/sdg_hub)
+<html>
+    <h3 align="center">
+      A modular, scalable, and efficient solution for creating synthetic data generation flows in a "low-code" manner.
+    </h3>
+    <h3 align="center">
+      <a href="http://ai-innovation.team/sdg_hub">Documentation</a> |
+      <a href="examples/">Examples</a> |
+      <a href="https://www.youtube.com/watch?v=aGKCViWjAmA">Video Tutorial</a>
+    </h3>
+</html>
+SDG Hub is designed to simplify data creation for LLMs, allowing users to chain computational units and build powerful flows for generating data and processing tasks. Define complex workflows using nothing but YAML configuration files.
+**📖 Full documentation available at: [https://ai-innovation.team/sdg_hub](https://ai-innovation.team/sdg_hub)**
+---
+## ✨ Key Features
+- **Low-Code Flow Creation**: Build sophisticated data generation pipelines using
+  simple YAML configuration files without writing any code.
+- **Modular Block System**: Compose workflows from reusable, self-contained
+  blocks that handle LLM calls, data transformations, and filtering.
+- **LLM-Agnostic**: Works with any language model through configurable
+  prompt templates and generation parameters.
+- **Prompt Engineering Friendly**: Tune LLM behavior by editing declarative YAML prompts.
+## 🚀 Installation
+### Stable Release (Recommended)
+```bash
+pip install sdg-hub
+```
+### Development Version
+```bash
+pip install git+https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub.git
+```
+## 🏁 Quick Start
+### Prerequisites
+Before getting started, make sure you have:
+- Python 3.8 or higher
+- LLM Inference Endpoint exposed through OpenAI API
+### Simple Example
+Here's the simplest way to get started:
+```python
+from sdg_hub.flow_runner import run_flow
+# Run a basic knowledge generation flow
+run_flow(
+    ds_path="my_data.jsonl",
+    save_path="output.jsonl",
+    endpoint="http://0.0.0.0:8000/v1",
+    flow_path="flows/generation/knowledge/synth_knowledge.yaml"
+)
+```
+### Advanced Configuration
+You can invoke any built-in flow using run_flow:
+```python
+from sdg_hub.flow_runner import run_flow
+run_flow(
+    ds_path="path/to/dataset.jsonl",
+    save_path="path/to/output.jsonl",
+    endpoint="http://0.0.0.0:8000/v1",
+    flow_path="path/to/flow.yaml",
+    checkpoint_dir="path/to/checkpoints",
+    batch_size=8,
+    num_workers=32,
+    save_freq=2,
+)
+```
+### 📂 Available Built-in Flows
+You can start with any of these YAML flows out of the box:
+#### 🔎 **Knowledge Flows**
+| Flow | Description |
+|------|-------------|
+| `synth_knowledge.yaml` | Produces document-grounded questions and answers for factual memorization |
+| `synth_knowledge1.5.yaml` | Improved version that builds intermediate representations for better recall |
+#### 🧠 **Skills Flows**
+| Flow | Description |
+|------|-------------|
+| `synth_skills.yaml` | Freeform skills QA generation (eg: "Create a new github issue to add type hints") |
+| `synth_grounded_skills.yaml` | Domain-specific skill generation (eg: "From the given conversation create a table for feature requests") |
+| `improve_responses.yaml` | Uses planning and critique-based refinement to improve generated answers |
+All these can be found here: [flows](src/sdg_hub/flows)
+## 📺 Video Tutorial
+For a comprehensive walkthrough of sdg_hub:
+[![SDG Hub Tutorial](https://img.youtube.com/vi/aGKCViWjAmA/0.jpg)](https://www.youtube.com/watch?v=aGKCViWjAmA)
+## 🤝 Contributing
+We welcome contributions from the community! Whether it's bug reports, feature requests, documentation improvements, or code contributions, please check out our [contribution guidelines](CONTRIBUTING.md).
+## 📄 License
+This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
+---
+Built with ❤️ by the Red Hat AI Innovation Team

sdg_hub-0.1.1/README.md ADDED Viewed

@@ -0,0 +1,131 @@
+# SDG Hub: Synthetic Data Generation Toolkit
+[![Build](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/pypi.yaml/badge.svg?branch=main)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/pypi.yaml)
+[![Release](https://img.shields.io/github/v/release/Red-Hat-AI-Innovation-Team/sdg_hub)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/releases)
+[![License](https://img.shields.io/github/license/Red-Hat-AI-Innovation-Team/sdg_hub)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/blob/main/LICENSE)
+[![Tests](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/test.yml/badge.svg)](https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub/actions/workflows/test.yml)
+[![codecov](https://codecov.io/gh/Red-Hat-AI-Innovation-Team/sdg_hub/graph/badge.svg?token=SP75BCXWO2)](https://codecov.io/gh/Red-Hat-AI-Innovation-Team/sdg_hub)
+<html>
+    <h3 align="center">
+      A modular, scalable, and efficient solution for creating synthetic data generation flows in a "low-code" manner.
+    </h3>
+    <h3 align="center">
+      <a href="http://ai-innovation.team/sdg_hub">Documentation</a> |
+      <a href="examples/">Examples</a> |
+      <a href="https://www.youtube.com/watch?v=aGKCViWjAmA">Video Tutorial</a>
+    </h3>
+</html>
+SDG Hub is designed to simplify data creation for LLMs, allowing users to chain computational units and build powerful flows for generating data and processing tasks. Define complex workflows using nothing but YAML configuration files.
+**📖 Full documentation available at: [https://ai-innovation.team/sdg_hub](https://ai-innovation.team/sdg_hub)**
+---
+## ✨ Key Features
+- **Low-Code Flow Creation**: Build sophisticated data generation pipelines using
+  simple YAML configuration files without writing any code.
+- **Modular Block System**: Compose workflows from reusable, self-contained
+  blocks that handle LLM calls, data transformations, and filtering.
+- **LLM-Agnostic**: Works with any language model through configurable
+  prompt templates and generation parameters.
+- **Prompt Engineering Friendly**: Tune LLM behavior by editing declarative YAML prompts.
+## 🚀 Installation
+### Stable Release (Recommended)
+```bash
+pip install sdg-hub
+```
+### Development Version
+```bash
+pip install git+https://github.com/Red-Hat-AI-Innovation-Team/sdg_hub.git
+```
+## 🏁 Quick Start
+### Prerequisites
+Before getting started, make sure you have:
+- Python 3.8 or higher
+- LLM Inference Endpoint exposed through OpenAI API
+### Simple Example
+Here's the simplest way to get started:
+```python
+from sdg_hub.flow_runner import run_flow
+# Run a basic knowledge generation flow
+run_flow(
+    ds_path="my_data.jsonl",
+    save_path="output.jsonl",
+    endpoint="http://0.0.0.0:8000/v1",
+    flow_path="flows/generation/knowledge/synth_knowledge.yaml"
+)
+```
+### Advanced Configuration
+You can invoke any built-in flow using run_flow:
+```python
+from sdg_hub.flow_runner import run_flow
+run_flow(
+    ds_path="path/to/dataset.jsonl",
+    save_path="path/to/output.jsonl",
+    endpoint="http://0.0.0.0:8000/v1",
+    flow_path="path/to/flow.yaml",
+    checkpoint_dir="path/to/checkpoints",
+    batch_size=8,
+    num_workers=32,
+    save_freq=2,
+)
+```
+### 📂 Available Built-in Flows
+You can start with any of these YAML flows out of the box:
+#### 🔎 **Knowledge Flows**
+| Flow | Description |
+|------|-------------|
+| `synth_knowledge.yaml` | Produces document-grounded questions and answers for factual memorization |
+| `synth_knowledge1.5.yaml` | Improved version that builds intermediate representations for better recall |
+#### 🧠 **Skills Flows**
+| Flow | Description |
+|------|-------------|
+| `synth_skills.yaml` | Freeform skills QA generation (eg: "Create a new github issue to add type hints") |
+| `synth_grounded_skills.yaml` | Domain-specific skill generation (eg: "From the given conversation create a table for feature requests") |
+| `improve_responses.yaml` | Uses planning and critique-based refinement to improve generated answers |
+All these can be found here: [flows](src/sdg_hub/flows)
+## 📺 Video Tutorial
+For a comprehensive walkthrough of sdg_hub:
+[![SDG Hub Tutorial](https://img.youtube.com/vi/aGKCViWjAmA/0.jpg)](https://www.youtube.com/watch?v=aGKCViWjAmA)
+## 🤝 Contributing
+We welcome contributions from the community! Whether it's bug reports, feature requests, documentation improvements, or code contributions, please check out our [contribution guidelines](CONTRIBUTING.md).
+## 📄 License
+This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
+---
+Built with ❤️ by the Red Hat AI Innovation Team

sdg_hub-0.1.1/assets/imgs/fig-workflow.png ADDED Viewed

Binary file

sdg-hub 0.1.0a3__tar.gz → 0.1.1__tar.gz

sdg-hub 0.1.0a3tar.gz → 0.1.1tar.gz