PyPI - docpipe-sdk - Versions diffs - 0.1.0__tar.gz - Mend

docpipe-sdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

docpipe_sdk-0.1.0/.github/dependabot.yml +35 -0
docpipe_sdk-0.1.0/.github/workflows/ci.yml +35 -0
docpipe_sdk-0.1.0/.github/workflows/publish.yml +48 -0
docpipe_sdk-0.1.0/.gitignore +18 -0
docpipe_sdk-0.1.0/CHANGELOG.md +27 -0
docpipe_sdk-0.1.0/Dockerfile +19 -0
docpipe_sdk-0.1.0/LICENSE +21 -0
docpipe_sdk-0.1.0/PKG-INFO +170 -0
docpipe_sdk-0.1.0/README.md +102 -0
docpipe_sdk-0.1.0/docpipe.example.yaml +29 -0
docpipe_sdk-0.1.0/pyproject.toml +100 -0
docpipe_sdk-0.1.0/scripts/release.sh +32 -0
docpipe_sdk-0.1.0/site/index.html +586 -0
docpipe_sdk-0.1.0/site/vercel.json +15 -0
docpipe_sdk-0.1.0/src/docpipe/__init__.py +150 -0
docpipe_sdk-0.1.0/src/docpipe/_version.py +1 -0
docpipe_sdk-0.1.0/src/docpipe/cli/__init__.py +0 -0
docpipe_sdk-0.1.0/src/docpipe/cli/main.py +308 -0
docpipe_sdk-0.1.0/src/docpipe/config/__init__.py +0 -0
docpipe_sdk-0.1.0/src/docpipe/config/loader.py +54 -0
docpipe_sdk-0.1.0/src/docpipe/config/settings.py +41 -0
docpipe_sdk-0.1.0/src/docpipe/core/__init__.py +0 -0
docpipe_sdk-0.1.0/src/docpipe/core/errors.py +41 -0
docpipe_sdk-0.1.0/src/docpipe/core/extractor.py +37 -0
docpipe_sdk-0.1.0/src/docpipe/core/parser.py +36 -0
docpipe_sdk-0.1.0/src/docpipe/core/pipeline.py +137 -0
docpipe_sdk-0.1.0/src/docpipe/core/types.py +106 -0
docpipe_sdk-0.1.0/src/docpipe/extractors/__init__.py +0 -0
docpipe_sdk-0.1.0/src/docpipe/extractors/langchain_extractor.py +164 -0
docpipe_sdk-0.1.0/src/docpipe/extractors/langextract_extractor.py +106 -0
docpipe_sdk-0.1.0/src/docpipe/ingestion/__init__.py +0 -0
docpipe_sdk-0.1.0/src/docpipe/ingestion/pipeline.py +206 -0
docpipe_sdk-0.1.0/src/docpipe/parsers/__init__.py +0 -0
docpipe_sdk-0.1.0/src/docpipe/parsers/docling_parser.py +136 -0
docpipe_sdk-0.1.0/src/docpipe/py.typed +0 -0
docpipe_sdk-0.1.0/src/docpipe/registry/__init__.py +0 -0
docpipe_sdk-0.1.0/src/docpipe/registry/registry.py +120 -0
docpipe_sdk-0.1.0/src/docpipe/server/__init__.py +0 -0
docpipe_sdk-0.1.0/src/docpipe/server/app.py +239 -0
docpipe_sdk-0.1.0/tests/__init__.py +0 -0
docpipe_sdk-0.1.0/tests/conftest.py +117 -0
docpipe_sdk-0.1.0/tests/integration/__init__.py +0 -0
docpipe_sdk-0.1.0/tests/unit/__init__.py +0 -0
docpipe_sdk-0.1.0/tests/unit/test_config.py +54 -0
docpipe_sdk-0.1.0/tests/unit/test_ingestion.py +77 -0
docpipe_sdk-0.1.0/tests/unit/test_pipeline.py +63 -0
docpipe_sdk-0.1.0/tests/unit/test_registry.py +95 -0
docpipe_sdk-0.1.0/tests/unit/test_types.py +128 -0

docpipe_sdk-0.1.0/.github/dependabot.yml ADDED Viewed

@@ -0,0 +1,35 @@
+version: 2
+updates:
+  # Python dependencies
+  - package-ecosystem: pip
+    directory: "/"
+    schedule:
+      interval: weekly
+      day: monday
+    open-pull-requests-limit: 10
+    labels:
+      - "dependencies"
+      - "python"
+    groups:
+      langchain:
+        patterns:
+          - "langchain-*"
+        update-types:
+          - "minor"
+          - "patch"
+      docling:
+        patterns:
+          - "docling*"
+      langextract:
+        patterns:
+          - "langextract*"
+  # GitHub Actions
+  - package-ecosystem: github-actions
+    directory: "/"
+    schedule:
+      interval: weekly
+    labels:
+      - "dependencies"
+      - "ci"

docpipe_sdk-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,35 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v6
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: pip install -e ".[dev]"
+      - name: Lint with ruff
+        run: ruff check src/
+      - name: Run unit tests
+        run: pytest tests/unit/ -v --tb=short
+      - name: Check types with mypy
+        run: mypy src/docpipe/ --ignore-missing-imports
+        continue-on-error: true

docpipe_sdk-0.1.0/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,48 @@
+name: Publish to PyPI
+on:
+  push:
+    tags:
+      - "v*"
+permissions:
+  id-token: write
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+      - name: Install build tools
+        run: pip install build
+      - name: Build package
+        run: python -m build
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v7
+        with:
+          name: dist
+          path: dist/
+  publish:
+    needs: build
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v8
+        with:
+          name: dist
+          path: dist/
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

docpipe_sdk-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,18 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+dist/
+build/
+.eggs/
+*.egg
+.venv/
+venv/
+.env
+*.so
+.mypy_cache/
+.pytest_cache/
+.ruff_cache/
+htmlcov/
+.coverage
+*.log

docpipe_sdk-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,27 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+## [0.1.0] - 2026-04-04
+### Added
+- Core pipeline architecture with Protocol-based parser and extractor interfaces
+- Docling parser adapter for document parsing (PDF, DOCX, images, audio, video)
+- LangExtract extractor adapter for LLM-based structured extraction
+- LangChain extractor adapter using `with_structured_output()`
+- Ingestion pipeline with LangChain text splitters, embeddings, and PGVector
+- Plugin registry with `importlib.metadata` entry-point auto-discovery
+- Configuration via Pydantic Settings (env vars + YAML files)
+- CLI commands: `parse`, `extract`, `run`, `ingest`, `search`, `serve`, `plugins`, `config`
+- FastAPI server with REST endpoints for all pipeline operations
+- Dockerfile for containerized deployment
+- 34 unit tests with mock parser/extractor
+[Unreleased]: https://github.com/thesunnysinha/docpipe/compare/v0.1.0...HEAD
+[0.1.0]: https://github.com/thesunnysinha/docpipe/releases/tag/v0.1.0

docpipe_sdk-0.1.0/Dockerfile ADDED Viewed

@@ -0,0 +1,19 @@
+FROM python:3.12-slim
+WORKDIR /app
+# Install system dependencies for document processing
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+COPY pyproject.toml README.md LICENSE ./
+COPY src/ ./src/
+RUN pip install --no-cache-dir ".[all,server]"
+ENTRYPOINT ["docpipe"]
+CMD ["serve", "--host", "0.0.0.0", "--port", "8000"]
+EXPOSE 8000

docpipe_sdk-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Sunny Sinha
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

docpipe_sdk-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,170 @@
+Metadata-Version: 2.4
+Name: docpipe-sdk
+Version: 0.1.0
+Summary: Unified document parsing, structured extraction, and vector ingestion pipeline
+Project-URL: Homepage, https://docpipe.vercel.app
+Project-URL: Repository, https://github.com/thesunnysinha/docpipe
+Project-URL: Bug Tracker, https://github.com/thesunnysinha/docpipe/issues
+Project-URL: Changelog, https://github.com/thesunnysinha/docpipe/blob/main/CHANGELOG.md
+Author-email: Sunny Sinha <thesunnysinha@gmail.com>
+License-Expression: MIT
+License-File: LICENSE
+Keywords: docling,document,extraction,ingestion,langchain,langextract,llm,parsing,pipeline,rag,vector
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Text Processing
+Classifier: Typing :: Typed
+Requires-Python: >=3.10
+Requires-Dist: click>=8.0
+Requires-Dist: langchain-core>=0.3
+Requires-Dist: langchain-text-splitters>=0.3
+Requires-Dist: pydantic-settings>=2.0
+Requires-Dist: pydantic>=2.0
+Requires-Dist: pyyaml>=6.0
+Provides-Extra: all
+Requires-Dist: docling>=2.0; extra == 'all'
+Requires-Dist: fastapi>=0.100; extra == 'all'
+Requires-Dist: langchain-google-genai>=2.0; extra == 'all'
+Requires-Dist: langchain-ollama>=0.3; extra == 'all'
+Requires-Dist: langchain-openai>=0.3; extra == 'all'
+Requires-Dist: langchain-postgres>=0.0.12; extra == 'all'
+Requires-Dist: langextract>=0.1; extra == 'all'
+Requires-Dist: python-multipart>=0.0.6; extra == 'all'
+Requires-Dist: uvicorn[standard]>=0.20; extra == 'all'
+Provides-Extra: dev
+Requires-Dist: httpx; extra == 'dev'
+Requires-Dist: mypy; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
+Requires-Dist: pytest-cov; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'
+Requires-Dist: ruff; extra == 'dev'
+Provides-Extra: docling
+Requires-Dist: docling>=2.0; extra == 'docling'
+Provides-Extra: google
+Requires-Dist: langchain-google-genai>=2.0; extra == 'google'
+Provides-Extra: huggingface
+Requires-Dist: langchain-huggingface>=0.1; extra == 'huggingface'
+Provides-Extra: langextract
+Requires-Dist: langextract>=0.1; extra == 'langextract'
+Provides-Extra: ollama
+Requires-Dist: langchain-ollama>=0.3; extra == 'ollama'
+Provides-Extra: openai
+Requires-Dist: langchain-openai>=0.3; extra == 'openai'
+Provides-Extra: pgvector
+Requires-Dist: langchain-postgres>=0.0.12; extra == 'pgvector'
+Provides-Extra: server
+Requires-Dist: fastapi>=0.100; extra == 'server'
+Requires-Dist: python-multipart>=0.0.6; extra == 'server'
+Requires-Dist: uvicorn[standard]>=0.20; extra == 'server'
+Description-Content-Type: text/markdown
+# docpipe
+Unified document parsing, structured extraction, and vector ingestion pipeline.
+## Overview
+docpipe connects document parsing (Docling), LLM-based structured extraction (LangExtract + LangChain), and vector ingestion (pgvector via LangChain) into a single composable pipeline.
+**Three independent pipelines, composable together:**
+1. **Parse**: Unstructured docs → parsed text/markdown (Docling)
+2. **Extract**: Text → structured entities via LLM (LangExtract or LangChain)
+3. **Ingest**: Parsed chunks → embeddings → your vector DB (LangChain + pgvector)
+## Install
+```bash
+# Core only
+pip install docpipe
+# With all backends
+pip install "docpipe[all]"
+# Pick what you need
+pip install "docpipe[docling]"              # Document parsing
+pip install "docpipe[langextract]"          # Google LangExtract
+pip install "docpipe[openai]"              # OpenAI embeddings + LLM
+pip install "docpipe[pgvector]"            # PostgreSQL vector store
+pip install "docpipe[server]"              # FastAPI server
+```
+## Quick Start
+### Python API
+```python
+import docpipe
+# Parse a document
+doc = docpipe.parse("invoice.pdf")
+print(doc.markdown)
+# Extract structured data
+schema = docpipe.ExtractionSchema(
+    description="Extract invoice line items with amounts",
+    model_id="gemini-2.5-flash",
+)
+results = docpipe.extract(doc.text, schema)
+# Full pipeline
+result = docpipe.run("invoice.pdf", schema)
+# Ingest into your vector DB
+config = docpipe.IngestionConfig(
+    connection_string="postgresql://user:pass@localhost:5432/mydb",
+    table_name="invoices",
+    embedding_provider="openai",
+    embedding_model="text-embedding-3-small",
+)
+docpipe.ingest("invoice.pdf", config=config)
+```
+### CLI
+```bash
+docpipe parse invoice.pdf --format markdown
+docpipe extract "John Doe, age 30" --schema schema.yaml --model gemini-2.5-flash
+docpipe run invoice.pdf --schema schema.yaml --model gemini-2.5-flash
+docpipe ingest invoice.pdf --db "postgresql://..." --table invoices \
+    --embedding-provider openai --embedding-model text-embedding-3-small
+docpipe search "total amount" --db "postgresql://..." --table invoices \
+    --embedding-provider openai --embedding-model text-embedding-3-small
+docpipe serve
+docpipe plugins list
+```
+### Docker
+```bash
+# API server
+docker run -p 8000:8000 --env-file .env docpipe
+# CLI
+docker run -v ./data:/data docpipe parse /data/invoice.pdf
+```
+## Plugin System
+Third-party packages can register as plugins via entry points:
+```toml
+# In your package's pyproject.toml
+[project.entry-points."docpipe.parsers"]
+my_parser = "my_package:MyParser"
+[project.entry-points."docpipe.extractors"]
+my_extractor = "my_package:MyExtractor"
+```
+## License
+MIT

docpipe_sdk-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,102 @@
+# docpipe
+Unified document parsing, structured extraction, and vector ingestion pipeline.
+## Overview
+docpipe connects document parsing (Docling), LLM-based structured extraction (LangExtract + LangChain), and vector ingestion (pgvector via LangChain) into a single composable pipeline.
+**Three independent pipelines, composable together:**
+1. **Parse**: Unstructured docs → parsed text/markdown (Docling)
+2. **Extract**: Text → structured entities via LLM (LangExtract or LangChain)
+3. **Ingest**: Parsed chunks → embeddings → your vector DB (LangChain + pgvector)
+## Install
+```bash
+# Core only
+pip install docpipe
+# With all backends
+pip install "docpipe[all]"
+# Pick what you need
+pip install "docpipe[docling]"              # Document parsing
+pip install "docpipe[langextract]"          # Google LangExtract
+pip install "docpipe[openai]"              # OpenAI embeddings + LLM
+pip install "docpipe[pgvector]"            # PostgreSQL vector store
+pip install "docpipe[server]"              # FastAPI server
+```
+## Quick Start
+### Python API
+```python
+import docpipe
+# Parse a document
+doc = docpipe.parse("invoice.pdf")
+print(doc.markdown)
+# Extract structured data
+schema = docpipe.ExtractionSchema(
+    description="Extract invoice line items with amounts",
+    model_id="gemini-2.5-flash",
+)
+results = docpipe.extract(doc.text, schema)
+# Full pipeline
+result = docpipe.run("invoice.pdf", schema)
+# Ingest into your vector DB
+config = docpipe.IngestionConfig(
+    connection_string="postgresql://user:pass@localhost:5432/mydb",
+    table_name="invoices",
+    embedding_provider="openai",
+    embedding_model="text-embedding-3-small",
+)
+docpipe.ingest("invoice.pdf", config=config)
+```
+### CLI
+```bash
+docpipe parse invoice.pdf --format markdown
+docpipe extract "John Doe, age 30" --schema schema.yaml --model gemini-2.5-flash
+docpipe run invoice.pdf --schema schema.yaml --model gemini-2.5-flash
+docpipe ingest invoice.pdf --db "postgresql://..." --table invoices \
+    --embedding-provider openai --embedding-model text-embedding-3-small
+docpipe search "total amount" --db "postgresql://..." --table invoices \
+    --embedding-provider openai --embedding-model text-embedding-3-small
+docpipe serve
+docpipe plugins list
+```
+### Docker
+```bash
+# API server
+docker run -p 8000:8000 --env-file .env docpipe
+# CLI
+docker run -v ./data:/data docpipe parse /data/invoice.pdf
+```
+## Plugin System
+Third-party packages can register as plugins via entry points:
+```toml
+# In your package's pyproject.toml
+[project.entry-points."docpipe.parsers"]
+my_parser = "my_package:MyParser"
+[project.entry-points."docpipe.extractors"]
+my_extractor = "my_package:MyExtractor"
+```
+## License
+MIT

docpipe_sdk-0.1.0/docpipe.example.yaml ADDED Viewed

@@ -0,0 +1,29 @@
+# docpipe configuration
+# Copy to docpipe.yaml and customize
+# Parser settings
+default_parser: docling
+parser_options: {}
+# Extractor settings
+default_extractor: langextract
+extractor_options: {}
+# Ingestion settings (provide your own DB connection)
+# db_connection_string: postgresql://user:pass@host:5432/dbname
+# db_table_name: docpipe_documents
+# embedding_provider: openai
+# embedding_model: text-embedding-3-small
+# chunk_size: 1000
+# chunk_overlap: 200
+# ingest_mode: both
+# Server settings
+server_host: "0.0.0.0"
+server_port: 8000
+# Pipeline settings
+max_concurrency: 4
+# Logging
+log_level: INFO

docpipe_sdk-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,100 @@
+[build-system]
+requires = ["hatchling>=1.26"]
+build-backend = "hatchling.build"
+[project]
+name = "docpipe-sdk"
+version = "0.1.0"
+description = "Unified document parsing, structured extraction, and vector ingestion pipeline"
+readme = "README.md"
+license = "MIT"
+license-files = ["LICENSE"]
+requires-python = ">=3.10"
+authors = [{ name = "Sunny Sinha", email = "thesunnysinha@gmail.com" }]
+keywords = ["document", "parsing", "extraction", "llm", "pipeline", "vector", "ingestion", "rag", "docling", "langextract", "langchain"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Text Processing",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Typing :: Typed",
+]
+dependencies = [
+    "pydantic>=2.0",
+    "pydantic-settings>=2.0",
+    "pyyaml>=6.0",
+    "click>=8.0",
+    "langchain-core>=0.3",
+    "langchain-text-splitters>=0.3",
+]
+[project.optional-dependencies]
+docling = ["docling>=2.0"]
+langextract = ["langextract>=0.1"]
+openai = ["langchain-openai>=0.3"]
+google = ["langchain-google-genai>=2.0"]
+ollama = ["langchain-ollama>=0.3"]
+huggingface = ["langchain-huggingface>=0.1"]
+pgvector = ["langchain-postgres>=0.0.12"]
+server = ["fastapi>=0.100", "uvicorn[standard]>=0.20", "python-multipart>=0.0.6"]
+all = ["docpipe-sdk[docling,langextract,openai,google,ollama,pgvector,server]"]
+dev = [
+    "pytest>=7.0",
+    "pytest-asyncio>=0.21",
+    "pytest-cov",
+    "ruff",
+    "mypy",
+    "httpx",
+]
+[project.scripts]
+docpipe = "docpipe.cli.main:cli"
+[project.entry-points."docpipe.parsers"]
+docling = "docpipe.parsers.docling_parser:DoclingParser"
+[project.entry-points."docpipe.extractors"]
+langextract = "docpipe.extractors.langextract_extractor:LangExtractExtractor"
+langchain = "docpipe.extractors.langchain_extractor:LangChainExtractor"
+[project.urls]
+Homepage = "https://docpipe.vercel.app"
+Repository = "https://github.com/thesunnysinha/docpipe"
+"Bug Tracker" = "https://github.com/thesunnysinha/docpipe/issues"
+Changelog = "https://github.com/thesunnysinha/docpipe/blob/main/CHANGELOG.md"
+[tool.hatch.build.targets.wheel]
+packages = ["src/docpipe"]
+[tool.ruff]
+target-version = "py310"
+line-length = 100
+src = ["src"]
+[tool.ruff.lint]
+select = ["E", "F", "I", "UP", "B", "SIM"]
+[tool.mypy]
+python_version = "3.10"
+strict = true
+warn_return_any = true
+warn_unused_configs = true
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+asyncio_mode = "auto"
+markers = [
+    "requires_docling: needs docling installed",
+    "requires_langextract: needs langextract installed",
+    "requires_langchain: needs langchain provider installed",
+    "requires_pgvector: needs pgvector DB available",
+    "requires_api_key: needs LLM API key configured",
+]

docpipe_sdk-0.1.0/scripts/release.sh ADDED Viewed

@@ -0,0 +1,32 @@
+#!/bin/bash
+# Release script for docpipe
+# Usage: ./scripts/release.sh 0.2.0
+set -euo pipefail
+VERSION="${1:?Usage: $0 <version>}"
+echo "Releasing docpipe v${VERSION}..."
+# Update version in source
+sed -i.bak "s/__version__ = \".*\"/__version__ = \"${VERSION}\"/" src/docpipe/_version.py
+rm -f src/docpipe/_version.py.bak
+# Update version in pyproject.toml
+sed -i.bak "s/^version = \".*\"/version = \"${VERSION}\"/" pyproject.toml
+rm -f pyproject.toml.bak
+# Stage changes
+git add src/docpipe/_version.py pyproject.toml
+# Commit
+git commit -m "release: v${VERSION}"
+# Tag
+git tag -a "v${VERSION}" -m "Release v${VERSION}"
+echo ""
+echo "Done! To publish:"
+echo "  git push origin main --tags"
+echo ""
+echo "GitHub Actions will automatically publish to PyPI."