dynamanic-raggedy 0.1.0a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dynamanic_raggedy-0.1.0a0/.env.example +22 -0
- dynamanic_raggedy-0.1.0a0/.github/workflows/ci.yml +119 -0
- dynamanic_raggedy-0.1.0a0/.github/workflows/docs.yml +43 -0
- dynamanic_raggedy-0.1.0a0/.gitignore +23 -0
- dynamanic_raggedy-0.1.0a0/.python-version +1 -0
- dynamanic_raggedy-0.1.0a0/CHANGELOG.md +65 -0
- dynamanic_raggedy-0.1.0a0/LICENSE +21 -0
- dynamanic_raggedy-0.1.0a0/PKG-INFO +163 -0
- dynamanic_raggedy-0.1.0a0/README.md +84 -0
- dynamanic_raggedy-0.1.0a0/docs/adapters.md +61 -0
- dynamanic_raggedy-0.1.0a0/docs/api-reference.md +166 -0
- dynamanic_raggedy-0.1.0a0/docs/concepts.md +93 -0
- dynamanic_raggedy-0.1.0a0/docs/configuration.md +127 -0
- dynamanic_raggedy-0.1.0a0/docs/index.md +40 -0
- dynamanic_raggedy-0.1.0a0/docs/intake.md +76 -0
- dynamanic_raggedy-0.1.0a0/docs/quickstart.md +93 -0
- dynamanic_raggedy-0.1.0a0/examples/01_hello_world.py +106 -0
- dynamanic_raggedy-0.1.0a0/examples/02_pgvector_anthropic.py +68 -0
- dynamanic_raggedy-0.1.0a0/examples/03_local_ollama.py +59 -0
- dynamanic_raggedy-0.1.0a0/examples/04_custom_chunker.py +110 -0
- dynamanic_raggedy-0.1.0a0/examples/05_intake_directory.py +67 -0
- dynamanic_raggedy-0.1.0a0/examples/06_lm_studio_gpu.py +56 -0
- dynamanic_raggedy-0.1.0a0/examples/examples_helpers.py +40 -0
- dynamanic_raggedy-0.1.0a0/mkdocs.yml +54 -0
- dynamanic_raggedy-0.1.0a0/pyproject.toml +178 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/__init__.py +70 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/_version.py +1 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/chunking/__init__.py +6 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/chunking/fixed.py +81 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/chunking/paragraph.py +70 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/chunking/semantic.py +149 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/chunking/sentence.py +101 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/config.py +117 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/cost/__init__.py +7 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/cost/pricing.py +36 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/cost/sqlite.py +159 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/cost/tracker.py +75 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/embeddings/__init__.py +3 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/embeddings/deterministic.py +55 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/embeddings/local.py +103 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/embeddings/ollama.py +82 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/embeddings/openai.py +65 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/errors.py +34 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/__init__.py +37 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/__init__.py +10 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/docx.py +44 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/html.py +64 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/pdf.py +44 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/plaintext.py +33 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/registry.py +103 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/pipeline.py +58 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/protocols.py +28 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/sources/__init__.py +5 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/sources/files.py +100 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/sources/memory.py +19 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/sources/s3.py +83 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/sources/url.py +71 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/intake/types.py +23 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/llm/__init__.py +3 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/llm/anthropic.py +90 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/llm/ollama.py +99 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/llm/openai.py +112 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/llm/registry.py +87 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/obs/__init__.py +3 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/obs/default.py +33 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/pii/__init__.py +4 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/pii/noop.py +22 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/pii/patterns.py +107 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/pii/redactor.py +99 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/pipeline.py +816 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/prompts/__init__.py +3 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/prompts/templates.py +45 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/__init__.py +21 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/chunker.py +14 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/cost.py +18 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/embedding.py +18 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/events.py +16 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/llm.py +28 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/pii.py +14 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/recorder.py +41 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/store.py +30 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/recording/__init__.py +6 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/recording/memory.py +101 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/recording/sqlite.py +317 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/stores/__init__.py +9 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/stores/chroma.py +349 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/stores/memory.py +93 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/stores/pgvector.py +330 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/stores/qdrant.py +338 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/stores/sqlite_vec.py +331 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/stores/weaviate.py +352 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/types.py +189 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/util/__init__.py +5 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/util/ids.py +13 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/util/sync.py +14 -0
- dynamanic_raggedy-0.1.0a0/src/raggedy/util/tokens.py +15 -0
- dynamanic_raggedy-0.1.0a0/tests/conftest.py +138 -0
- dynamanic_raggedy-0.1.0a0/tests/intake/__init__.py +0 -0
- dynamanic_raggedy-0.1.0a0/tests/intake/test_extractors.py +50 -0
- dynamanic_raggedy-0.1.0a0/tests/intake/test_ingest_from.py +64 -0
- dynamanic_raggedy-0.1.0a0/tests/intake/test_sources.py +101 -0
- dynamanic_raggedy-0.1.0a0/tests/intake/test_url_source.py +76 -0
- dynamanic_raggedy-0.1.0a0/tests/stores/__init__.py +0 -0
- dynamanic_raggedy-0.1.0a0/tests/stores/test_chroma.py +156 -0
- dynamanic_raggedy-0.1.0a0/tests/stores/test_memory.py +133 -0
- dynamanic_raggedy-0.1.0a0/tests/stores/test_pgvector.py +140 -0
- dynamanic_raggedy-0.1.0a0/tests/stores/test_qdrant.py +170 -0
- dynamanic_raggedy-0.1.0a0/tests/stores/test_sqlite_vec.py +161 -0
- dynamanic_raggedy-0.1.0a0/tests/stores/test_weaviate.py +142 -0
- dynamanic_raggedy-0.1.0a0/tests/test_chunkers.py +102 -0
- dynamanic_raggedy-0.1.0a0/tests/test_chunking.py +43 -0
- dynamanic_raggedy-0.1.0a0/tests/test_config_yaml.py +71 -0
- dynamanic_raggedy-0.1.0a0/tests/test_pii.py +48 -0
- dynamanic_raggedy-0.1.0a0/tests/test_pipeline.py +105 -0
- dynamanic_raggedy-0.1.0a0/tests/test_pipeline_errors.py +80 -0
- dynamanic_raggedy-0.1.0a0/tests/test_sqlite_cost.py +101 -0
- dynamanic_raggedy-0.1.0a0/tests/test_sqlite_recorder.py +134 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copy to .env and uncomment any keys you want Raggedy to pick up.
|
|
2
|
+
# `Raggedy()` calls python-dotenv at construction time and populates
|
|
3
|
+
# os.environ (without overriding existing vars), so provider SDKs
|
|
4
|
+
# (anthropic, openai) read these directly.
|
|
5
|
+
|
|
6
|
+
# --- LLM providers ----------------------------------------------------
|
|
7
|
+
# ANTHROPIC_API_KEY=
|
|
8
|
+
# OPENAI_API_KEY=
|
|
9
|
+
# OPENAI_ORG=
|
|
10
|
+
# OLLAMA_HOST=http://localhost:11434
|
|
11
|
+
# LMSTUDIO_BASE_URL=http://localhost:1234/v1
|
|
12
|
+
# LMSTUDIO_API_KEY=lm-studio
|
|
13
|
+
|
|
14
|
+
# --- Raggedy adapter selection (any RAGGEDY_* setting in config.py) ---
|
|
15
|
+
# RAGGEDY_LLM=anthropic
|
|
16
|
+
# RAGGEDY_LLM_MODEL=claude-haiku-4-5-20251001
|
|
17
|
+
# RAGGEDY_EMBEDDING=sentence-transformers
|
|
18
|
+
# RAGGEDY_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
|
19
|
+
# RAGGEDY_DEVICE=auto # auto | cuda | mps | cpu
|
|
20
|
+
# RAGGEDY_STORE=memory
|
|
21
|
+
# RAGGEDY_NAMESPACE=default
|
|
22
|
+
# RAGGEDY_COST_DAILY_LIMIT_USD=1.00
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
concurrency:
|
|
10
|
+
group: ci-${{ github.ref }}
|
|
11
|
+
cancel-in-progress: true
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
lint-and-type:
|
|
15
|
+
name: Lint + types (py3.12)
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
- uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.12"
|
|
22
|
+
cache: pip
|
|
23
|
+
- name: Install
|
|
24
|
+
run: |
|
|
25
|
+
python -m pip install --upgrade pip
|
|
26
|
+
pip install -e ".[dev]"
|
|
27
|
+
- name: Ruff
|
|
28
|
+
run: python -m ruff check src tests examples
|
|
29
|
+
- name: Mypy
|
|
30
|
+
run: python -m mypy src
|
|
31
|
+
|
|
32
|
+
test:
|
|
33
|
+
name: pytest (py${{ matrix.python }})
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
strategy:
|
|
36
|
+
fail-fast: false
|
|
37
|
+
matrix:
|
|
38
|
+
python: ["3.11", "3.12", "3.13"]
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
- uses: actions/setup-python@v5
|
|
42
|
+
with:
|
|
43
|
+
python-version: ${{ matrix.python }}
|
|
44
|
+
cache: pip
|
|
45
|
+
- name: Install base + dev
|
|
46
|
+
run: |
|
|
47
|
+
python -m pip install --upgrade pip
|
|
48
|
+
pip install -e ".[dev]"
|
|
49
|
+
- name: Run tests (no coverage gate — adapter modules require extras)
|
|
50
|
+
run: python -m pytest -q --no-header
|
|
51
|
+
|
|
52
|
+
test-with-extras:
|
|
53
|
+
name: pytest + coverage (sqlite-vec + chroma + qdrant)
|
|
54
|
+
runs-on: ubuntu-latest
|
|
55
|
+
steps:
|
|
56
|
+
- uses: actions/checkout@v4
|
|
57
|
+
- uses: actions/setup-python@v5
|
|
58
|
+
with:
|
|
59
|
+
python-version: "3.12"
|
|
60
|
+
cache: pip
|
|
61
|
+
- name: Install extras
|
|
62
|
+
run: |
|
|
63
|
+
python -m pip install --upgrade pip
|
|
64
|
+
pip install -e ".[dev,sqlitevec,chroma,qdrant]"
|
|
65
|
+
- name: Run tests with coverage gate
|
|
66
|
+
run: |
|
|
67
|
+
python -m pytest -q \
|
|
68
|
+
--cov=raggedy \
|
|
69
|
+
--cov-report=term-missing \
|
|
70
|
+
--cov-report=xml
|
|
71
|
+
- name: Upload coverage artifact
|
|
72
|
+
uses: actions/upload-artifact@v4
|
|
73
|
+
with:
|
|
74
|
+
name: coverage-xml
|
|
75
|
+
path: coverage.xml
|
|
76
|
+
|
|
77
|
+
pgvector-integration:
|
|
78
|
+
name: pgvector integration
|
|
79
|
+
runs-on: ubuntu-latest
|
|
80
|
+
services:
|
|
81
|
+
postgres:
|
|
82
|
+
image: ankane/pgvector
|
|
83
|
+
env:
|
|
84
|
+
POSTGRES_PASSWORD: pg
|
|
85
|
+
ports:
|
|
86
|
+
- 5432:5432
|
|
87
|
+
options: >-
|
|
88
|
+
--health-cmd="pg_isready -U postgres"
|
|
89
|
+
--health-interval=5s --health-timeout=5s --health-retries=10
|
|
90
|
+
env:
|
|
91
|
+
RAGGEDY_TEST_PG_URL: postgresql://postgres:pg@localhost:5432/postgres
|
|
92
|
+
steps:
|
|
93
|
+
- uses: actions/checkout@v4
|
|
94
|
+
- uses: actions/setup-python@v5
|
|
95
|
+
with:
|
|
96
|
+
python-version: "3.12"
|
|
97
|
+
cache: pip
|
|
98
|
+
- name: Install
|
|
99
|
+
run: |
|
|
100
|
+
python -m pip install --upgrade pip
|
|
101
|
+
pip install -e ".[dev,pgvector]"
|
|
102
|
+
- name: pgvector tests
|
|
103
|
+
run: python -m pytest -q tests/stores/test_pgvector.py
|
|
104
|
+
|
|
105
|
+
docs-build:
|
|
106
|
+
name: docs build
|
|
107
|
+
runs-on: ubuntu-latest
|
|
108
|
+
steps:
|
|
109
|
+
- uses: actions/checkout@v4
|
|
110
|
+
- uses: actions/setup-python@v5
|
|
111
|
+
with:
|
|
112
|
+
python-version: "3.12"
|
|
113
|
+
cache: pip
|
|
114
|
+
- name: Install
|
|
115
|
+
run: |
|
|
116
|
+
python -m pip install --upgrade pip
|
|
117
|
+
pip install -e ".[docs]"
|
|
118
|
+
- name: mkdocs build (strict)
|
|
119
|
+
run: python -m mkdocs build --strict
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: Docs
|
|
2
|
+
|
|
3
|
+
# Pages must be enabled in repo Settings → Pages (source: GitHub Actions)
|
|
4
|
+
# before this workflow can deploy. Run manually via `gh workflow run docs.yml`
|
|
5
|
+
# once Pages is set up; auto-deploy on push is intentionally disabled to
|
|
6
|
+
# avoid red CI on repos where Pages isn't configured yet.
|
|
7
|
+
on:
|
|
8
|
+
workflow_dispatch:
|
|
9
|
+
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
pages: write
|
|
13
|
+
id-token: write
|
|
14
|
+
|
|
15
|
+
concurrency:
|
|
16
|
+
group: docs-${{ github.ref }}
|
|
17
|
+
cancel-in-progress: true
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
deploy:
|
|
21
|
+
name: build & deploy
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
environment:
|
|
24
|
+
name: github-pages
|
|
25
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
- uses: actions/setup-python@v5
|
|
29
|
+
with:
|
|
30
|
+
python-version: "3.12"
|
|
31
|
+
cache: pip
|
|
32
|
+
- name: Install
|
|
33
|
+
run: |
|
|
34
|
+
python -m pip install --upgrade pip
|
|
35
|
+
pip install -e ".[docs]"
|
|
36
|
+
- name: Build site
|
|
37
|
+
run: python -m mkdocs build --strict --site-dir _site
|
|
38
|
+
- uses: actions/configure-pages@v5
|
|
39
|
+
- uses: actions/upload-pages-artifact@v3
|
|
40
|
+
with:
|
|
41
|
+
path: _site
|
|
42
|
+
- id: deployment
|
|
43
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.py[cod]
|
|
3
|
+
*.egg-info/
|
|
4
|
+
*.egg
|
|
5
|
+
.venv/
|
|
6
|
+
.env
|
|
7
|
+
.envrc
|
|
8
|
+
build/
|
|
9
|
+
dist/
|
|
10
|
+
.pytest_cache/
|
|
11
|
+
.mypy_cache/
|
|
12
|
+
.ruff_cache/
|
|
13
|
+
.coverage
|
|
14
|
+
coverage.xml
|
|
15
|
+
htmlcov/
|
|
16
|
+
*.sqlite
|
|
17
|
+
*.db
|
|
18
|
+
.DS_Store
|
|
19
|
+
.idea/
|
|
20
|
+
.vscode/
|
|
21
|
+
|
|
22
|
+
# mkdocs build output
|
|
23
|
+
site/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.11
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to Raggedy are documented here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
4
|
+
|
|
5
|
+
## [Unreleased]
|
|
6
|
+
|
|
7
|
+
## [0.1.0] — 2026-05-19
|
|
8
|
+
|
|
9
|
+
Initial alpha release.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
#### Core pipeline
|
|
14
|
+
- `Raggedy` facade and `RAGPipeline` async orchestrator (14-step flow: validate → start run → cost-gate → PII-redact → embed → retrieve → re-redact → pack → render prompt → LLM → record usage → finish run → return).
|
|
15
|
+
- Eight Protocols for full adapter swappability: `VectorStore`, `EmbeddingBackend`, `LLMProvider`, `Chunker`, `PIIRedactor`, `CostTracker`, `RunRecorder`, `Logger`/`EventSink`.
|
|
16
|
+
- Domain-neutral isolation: single `namespace: str` plus free-form `tags: dict[str, str]`. No tenant / matter / legal-specific concepts.
|
|
17
|
+
- `RaggedyConfig` (pydantic-settings) with `RAGGEDY_*` env prefix, `.env` auto-load via python-dotenv, and `from_yaml(path)` loader.
|
|
18
|
+
|
|
19
|
+
#### Vector stores
|
|
20
|
+
- `InMemoryStore` — numpy cosine, namespace + tag filter.
|
|
21
|
+
- `SqliteVecStore` (`[sqlitevec]`) — `sqlite-vec` extension, `distance_metric=cosine`, in-memory and file-backed.
|
|
22
|
+
- `PgVectorStore` (`[pgvector]`) — asyncpg + pgvector; JSONB tags with GIN index.
|
|
23
|
+
- `QdrantStore` (`[qdrant]`) — UUIDv5 from chunk id; supports embedded `:memory:` and HTTP/HTTPS endpoints.
|
|
24
|
+
- `ChromaStore` (`[chroma]`) — EphemeralClient / PersistentClient / HttpClient; primitive metadata flattening.
|
|
25
|
+
- `WeaviateStore` (`[weaviate]`) — v4 async client; PascalCase collection.
|
|
26
|
+
|
|
27
|
+
#### LLM providers
|
|
28
|
+
- `AnthropicProvider` (`[anthropic]`) — Claude 4.x family.
|
|
29
|
+
- `OpenAIProvider` (`[openai]`) — tiktoken-aware `count_tokens` when installed.
|
|
30
|
+
- `OllamaProvider` — uses base httpx; local zero-cost.
|
|
31
|
+
- LM Studio dispatched through `OpenAIProvider` against `LMSTUDIO_BASE_URL`.
|
|
32
|
+
|
|
33
|
+
#### Embedding backends
|
|
34
|
+
- `DeterministicEmbedder` — hashed bag-of-words (real algorithm, useful for tests).
|
|
35
|
+
- `SentenceTransformersEmbedder` (`[local]`) — GPU-aware via `detect_device()` (CUDA → MPS → CPU); honours `embedding_batch_size`.
|
|
36
|
+
- `OpenAIEmbedder` (`[openai]`).
|
|
37
|
+
- `OllamaEmbedder` — uses base httpx; batch `/api/embed` with single-shot fallback.
|
|
38
|
+
|
|
39
|
+
#### Chunkers
|
|
40
|
+
- `SemanticChunker` — paragraph + sentence boundaries, greedy merge, leading overlap, char-offset tracking.
|
|
41
|
+
- `ParagraphChunker` — one chunk per paragraph.
|
|
42
|
+
- `SentenceChunker` — sentence-level greedy merge.
|
|
43
|
+
- `FixedChunker` — fixed-window with word-boundary snap.
|
|
44
|
+
|
|
45
|
+
#### Intake layer
|
|
46
|
+
- Sources: `MemorySource`, `FileSource`, `DirectorySource`, `UrlSource` (base), `S3Source` (`[s3]`).
|
|
47
|
+
- Extractors: `PlainTextExtractor`, `HtmlExtractor` (stdlib only), `PdfExtractor` (`[pdf]`), `DocxExtractor` (`[docx]`).
|
|
48
|
+
- `IntakeRunner` orchestrates source → extractor → `Document`. `ExtractorRegistry` dispatches by `content_type`.
|
|
49
|
+
- `Raggedy.ingest_from(source)` one-liner with batching.
|
|
50
|
+
|
|
51
|
+
#### Batteries
|
|
52
|
+
- `RegexPIIRedactor` — 10 PII types (SSN, email, phone, DOB, address, credit-card with Luhn, IP, passport, driver's license, bank account). `RedactionResult.redaction_map` is in-memory only; never persisted.
|
|
53
|
+
- `InMemoryCostTracker` and `SqliteCostTracker` (atomic daily upsert via `INSERT … ON CONFLICT DO UPDATE`).
|
|
54
|
+
- `InMemoryRunRecorder` (bounded deque) and `SqliteRunRecorder` (FK-cascade artifacts, durable).
|
|
55
|
+
- Versioned RAG prompt templates pinned in every run record (`prompt_template_version`).
|
|
56
|
+
|
|
57
|
+
#### Tests, examples, docs
|
|
58
|
+
- 73+ tests; full-suite tests run offline; pgvector / Weaviate / live-LLM tests skip-if-unavailable.
|
|
59
|
+
- 6 worked examples (hello world, pgvector + Anthropic, local Ollama, custom chunker, intake directory, LM Studio GPU).
|
|
60
|
+
- mkdocs-material site (`docs/`).
|
|
61
|
+
|
|
62
|
+
### Design constraints
|
|
63
|
+
- No mock LLM providers ship with the library; tests use real providers with skip-if-unavailable.
|
|
64
|
+
- Async-first; sync shims via `asgiref.async_to_sync`.
|
|
65
|
+
- All external calls wrapped → `ProviderError` / `ConfigError`. Run record always finalises with the appropriate status on every code path.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dynamanic LLC
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dynamanic-raggedy
|
|
3
|
+
Version: 0.1.0a0
|
|
4
|
+
Summary: Batteries-included, importable RAG core.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Dynamanic/raggedy
|
|
6
|
+
Project-URL: Repository, https://github.com/Dynamanic/raggedy
|
|
7
|
+
Author-email: Mike Jackson <dynamanicllc@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: embeddings,llm,rag,retrieval,vector-search
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Requires-Dist: asgiref>=3.8
|
|
20
|
+
Requires-Dist: httpx>=0.27
|
|
21
|
+
Requires-Dist: numpy>=1.26
|
|
22
|
+
Requires-Dist: pydantic-settings>=2.3
|
|
23
|
+
Requires-Dist: pydantic>=2.7
|
|
24
|
+
Requires-Dist: python-dotenv>=1.0
|
|
25
|
+
Requires-Dist: python-ulid>=2.7
|
|
26
|
+
Requires-Dist: pyyaml>=6.0
|
|
27
|
+
Provides-Extra: all
|
|
28
|
+
Requires-Dist: aiosqlite>=0.20; extra == 'all'
|
|
29
|
+
Requires-Dist: anthropic>=0.34; extra == 'all'
|
|
30
|
+
Requires-Dist: asyncpg>=0.29; extra == 'all'
|
|
31
|
+
Requires-Dist: boto3>=1.34; extra == 'all'
|
|
32
|
+
Requires-Dist: chromadb>=0.5; extra == 'all'
|
|
33
|
+
Requires-Dist: openai>=1.40; extra == 'all'
|
|
34
|
+
Requires-Dist: pgvector>=0.3; extra == 'all'
|
|
35
|
+
Requires-Dist: pypdf>=4.0; extra == 'all'
|
|
36
|
+
Requires-Dist: python-docx>=1.1; extra == 'all'
|
|
37
|
+
Requires-Dist: qdrant-client>=1.10; extra == 'all'
|
|
38
|
+
Requires-Dist: sentence-transformers>=3.0; extra == 'all'
|
|
39
|
+
Requires-Dist: sqlite-vec>=0.1.3; extra == 'all'
|
|
40
|
+
Requires-Dist: torch>=2.2; extra == 'all'
|
|
41
|
+
Requires-Dist: weaviate-client>=4.6; extra == 'all'
|
|
42
|
+
Provides-Extra: anthropic
|
|
43
|
+
Requires-Dist: anthropic>=0.34; extra == 'anthropic'
|
|
44
|
+
Provides-Extra: chroma
|
|
45
|
+
Requires-Dist: chromadb>=0.5; extra == 'chroma'
|
|
46
|
+
Provides-Extra: dev
|
|
47
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
48
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
49
|
+
Requires-Dist: pytest-cov>=5; extra == 'dev'
|
|
50
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
51
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
52
|
+
Provides-Extra: docs
|
|
53
|
+
Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
|
|
54
|
+
Requires-Dist: mkdocs>=1.6; extra == 'docs'
|
|
55
|
+
Requires-Dist: pymdown-extensions>=10; extra == 'docs'
|
|
56
|
+
Provides-Extra: docx
|
|
57
|
+
Requires-Dist: python-docx>=1.1; extra == 'docx'
|
|
58
|
+
Provides-Extra: local
|
|
59
|
+
Requires-Dist: sentence-transformers>=3.0; extra == 'local'
|
|
60
|
+
Requires-Dist: torch>=2.2; extra == 'local'
|
|
61
|
+
Provides-Extra: ollama
|
|
62
|
+
Provides-Extra: openai
|
|
63
|
+
Requires-Dist: openai>=1.40; extra == 'openai'
|
|
64
|
+
Provides-Extra: pdf
|
|
65
|
+
Requires-Dist: pypdf>=4.0; extra == 'pdf'
|
|
66
|
+
Provides-Extra: pgvector
|
|
67
|
+
Requires-Dist: asyncpg>=0.29; extra == 'pgvector'
|
|
68
|
+
Requires-Dist: pgvector>=0.3; extra == 'pgvector'
|
|
69
|
+
Provides-Extra: qdrant
|
|
70
|
+
Requires-Dist: qdrant-client>=1.10; extra == 'qdrant'
|
|
71
|
+
Provides-Extra: s3
|
|
72
|
+
Requires-Dist: boto3>=1.34; extra == 's3'
|
|
73
|
+
Provides-Extra: sqlitevec
|
|
74
|
+
Requires-Dist: aiosqlite>=0.20; extra == 'sqlitevec'
|
|
75
|
+
Requires-Dist: sqlite-vec>=0.1.3; extra == 'sqlitevec'
|
|
76
|
+
Provides-Extra: weaviate
|
|
77
|
+
Requires-Dist: weaviate-client>=4.6; extra == 'weaviate'
|
|
78
|
+
Description-Content-Type: text/markdown
|
|
79
|
+
|
|
80
|
+
# Raggedy
|
|
81
|
+
|
|
82
|
+
Batteries-included, importable RAG core. `pip install dynamanic-raggedy`, then `from raggedy import ...` — drop it into any Python app.
|
|
83
|
+
|
|
84
|
+
## Status
|
|
85
|
+
|
|
86
|
+
Pre-release (`0.1.0a0`). The library ships no mock providers — every LLM and embedder is a real adapter. Tests skip cleanly when no live LLM is reachable.
|
|
87
|
+
|
|
88
|
+
Phase 1 + intake + real providers shipped: in-memory store, deterministic embedder, semantic chunker, PII redaction (10 types, ephemeral map), cost tracking with daily limits, audit-run records, intake layer (files / directories / URLs / S3 → plain text / markdown / HTML / PDF / DOCX), and LLM adapters for Anthropic, OpenAI, Ollama, and LM Studio plus a GPU-aware local embedder (sentence-transformers, CUDA → MPS → CPU autodetect).
|
|
89
|
+
|
|
90
|
+
## Quick start
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from raggedy import Raggedy, RaggedyConfig, Document
|
|
94
|
+
|
|
95
|
+
rag = Raggedy(RaggedyConfig(
|
|
96
|
+
llm="anthropic", # or "openai", "ollama", "lm_studio"
|
|
97
|
+
llm_model="claude-haiku-4-5-20251001",
|
|
98
|
+
embedding="sentence-transformers",
|
|
99
|
+
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
|
|
100
|
+
device="auto", # CUDA → MPS → CPU
|
|
101
|
+
store="memory",
|
|
102
|
+
namespace="kb-handbook",
|
|
103
|
+
))
|
|
104
|
+
|
|
105
|
+
rag.ingest_sync([
|
|
106
|
+
Document(id="hb-1", text="Refunds are processed within 5 business days."),
|
|
107
|
+
])
|
|
108
|
+
|
|
109
|
+
result = rag.query_sync("How long do refunds take?", top_k=4)
|
|
110
|
+
print(result.answer)
|
|
111
|
+
for s in result.sources:
|
|
112
|
+
print(f"[Source {s.index}] {s.document_id} score={s.score:.3f}")
|
|
113
|
+
print(f"cost=${result.cost_usd:.6f} run_id={result.run_id}")
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### `.env`
|
|
117
|
+
|
|
118
|
+
`Raggedy()` calls `python-dotenv` once at construction and populates `os.environ` (without overriding existing vars). Drop your keys in a `.env` file at the project root:
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
ANTHROPIC_API_KEY=sk-ant-...
|
|
122
|
+
OPENAI_API_KEY=sk-...
|
|
123
|
+
OLLAMA_HOST=http://localhost:11434
|
|
124
|
+
LMSTUDIO_BASE_URL=http://localhost:1234/v1
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
See `.env.example` for the full list.
|
|
128
|
+
|
|
129
|
+
### Intake
|
|
130
|
+
|
|
131
|
+
`Raggedy.ingest_from(source)` streams documents through extractors before indexing.
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from raggedy.intake import DirectorySource, UrlSource
|
|
135
|
+
|
|
136
|
+
rag.ingest_from_sync(DirectorySource("./docs"), tags={"corpus": "handbook"})
|
|
137
|
+
rag.ingest_from_sync(UrlSource(["https://example.com/blog"]))
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Install
|
|
141
|
+
|
|
142
|
+
The PyPI distribution name is `dynamanic-raggedy` (the short name `raggedy` is taken by an unrelated maintained project). The import path is still `raggedy`.
|
|
143
|
+
|
|
144
|
+
```sh
|
|
145
|
+
pip install dynamanic-raggedy # base: deterministic embedder, in-memory store, intake stdlib parsers
|
|
146
|
+
pip install "dynamanic-raggedy[anthropic]" # Anthropic LLM
|
|
147
|
+
pip install "dynamanic-raggedy[openai]" # OpenAI LLM + embeddings; also powers LM Studio (OpenAI-compatible)
|
|
148
|
+
pip install "dynamanic-raggedy[ollama]" # no extra deps — ollama uses base httpx
|
|
149
|
+
pip install "dynamanic-raggedy[local]" # sentence-transformers + torch (GPU-aware)
|
|
150
|
+
pip install "dynamanic-raggedy[pdf]" # PDF extractor (pypdf)
|
|
151
|
+
pip install "dynamanic-raggedy[docx]" # DOCX extractor (python-docx)
|
|
152
|
+
pip install "dynamanic-raggedy[s3]" # S3 source (boto3)
|
|
153
|
+
pip install "dynamanic-raggedy[sqlitevec]" # SQLite-vec store
|
|
154
|
+
pip install "dynamanic-raggedy[pgvector]" # pgvector store
|
|
155
|
+
pip install "dynamanic-raggedy[qdrant]" # Qdrant store
|
|
156
|
+
pip install "dynamanic-raggedy[chroma]" # Chroma store
|
|
157
|
+
pip install "dynamanic-raggedy[weaviate]" # Weaviate store
|
|
158
|
+
pip install "dynamanic-raggedy[all]" # everything
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## License
|
|
162
|
+
|
|
163
|
+
MIT
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Raggedy
|
|
2
|
+
|
|
3
|
+
Batteries-included, importable RAG core. `pip install dynamanic-raggedy`, then `from raggedy import ...` — drop it into any Python app.
|
|
4
|
+
|
|
5
|
+
## Status
|
|
6
|
+
|
|
7
|
+
Pre-release (`0.1.0a0`). The library ships no mock providers — every LLM and embedder is a real adapter. Tests skip cleanly when no live LLM is reachable.
|
|
8
|
+
|
|
9
|
+
Phase 1 + intake + real providers shipped: in-memory store, deterministic embedder, semantic chunker, PII redaction (10 types, ephemeral map), cost tracking with daily limits, audit-run records, intake layer (files / directories / URLs / S3 → plain text / markdown / HTML / PDF / DOCX), and LLM adapters for Anthropic, OpenAI, Ollama, and LM Studio plus a GPU-aware local embedder (sentence-transformers, CUDA → MPS → CPU autodetect).
|
|
10
|
+
|
|
11
|
+
## Quick start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from raggedy import Raggedy, RaggedyConfig, Document
|
|
15
|
+
|
|
16
|
+
rag = Raggedy(RaggedyConfig(
|
|
17
|
+
llm="anthropic", # or "openai", "ollama", "lm_studio"
|
|
18
|
+
llm_model="claude-haiku-4-5-20251001",
|
|
19
|
+
embedding="sentence-transformers",
|
|
20
|
+
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
|
|
21
|
+
device="auto", # CUDA → MPS → CPU
|
|
22
|
+
store="memory",
|
|
23
|
+
namespace="kb-handbook",
|
|
24
|
+
))
|
|
25
|
+
|
|
26
|
+
rag.ingest_sync([
|
|
27
|
+
Document(id="hb-1", text="Refunds are processed within 5 business days."),
|
|
28
|
+
])
|
|
29
|
+
|
|
30
|
+
result = rag.query_sync("How long do refunds take?", top_k=4)
|
|
31
|
+
print(result.answer)
|
|
32
|
+
for s in result.sources:
|
|
33
|
+
print(f"[Source {s.index}] {s.document_id} score={s.score:.3f}")
|
|
34
|
+
print(f"cost=${result.cost_usd:.6f} run_id={result.run_id}")
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### `.env`
|
|
38
|
+
|
|
39
|
+
`Raggedy()` calls `python-dotenv` once at construction and populates `os.environ` (without overriding existing vars). Drop your keys in a `.env` file at the project root:
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
ANTHROPIC_API_KEY=sk-ant-...
|
|
43
|
+
OPENAI_API_KEY=sk-...
|
|
44
|
+
OLLAMA_HOST=http://localhost:11434
|
|
45
|
+
LMSTUDIO_BASE_URL=http://localhost:1234/v1
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
See `.env.example` for the full list.
|
|
49
|
+
|
|
50
|
+
### Intake
|
|
51
|
+
|
|
52
|
+
`Raggedy.ingest_from(source)` streams documents through extractors before indexing.
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from raggedy.intake import DirectorySource, UrlSource
|
|
56
|
+
|
|
57
|
+
rag.ingest_from_sync(DirectorySource("./docs"), tags={"corpus": "handbook"})
|
|
58
|
+
rag.ingest_from_sync(UrlSource(["https://example.com/blog"]))
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Install
|
|
62
|
+
|
|
63
|
+
The PyPI distribution name is `dynamanic-raggedy` (the short name `raggedy` is taken by an unrelated maintained project). The import path is still `raggedy`.
|
|
64
|
+
|
|
65
|
+
```sh
|
|
66
|
+
pip install dynamanic-raggedy # base: deterministic embedder, in-memory store, intake stdlib parsers
|
|
67
|
+
pip install "dynamanic-raggedy[anthropic]" # Anthropic LLM
|
|
68
|
+
pip install "dynamanic-raggedy[openai]" # OpenAI LLM + embeddings; also powers LM Studio (OpenAI-compatible)
|
|
69
|
+
pip install "dynamanic-raggedy[ollama]" # no extra deps — ollama uses base httpx
|
|
70
|
+
pip install "dynamanic-raggedy[local]" # sentence-transformers + torch (GPU-aware)
|
|
71
|
+
pip install "dynamanic-raggedy[pdf]" # PDF extractor (pypdf)
|
|
72
|
+
pip install "dynamanic-raggedy[docx]" # DOCX extractor (python-docx)
|
|
73
|
+
pip install "dynamanic-raggedy[s3]" # S3 source (boto3)
|
|
74
|
+
pip install "dynamanic-raggedy[sqlitevec]" # SQLite-vec store
|
|
75
|
+
pip install "dynamanic-raggedy[pgvector]" # pgvector store
|
|
76
|
+
pip install "dynamanic-raggedy[qdrant]" # Qdrant store
|
|
77
|
+
pip install "dynamanic-raggedy[chroma]" # Chroma store
|
|
78
|
+
pip install "dynamanic-raggedy[weaviate]" # Weaviate store
|
|
79
|
+
pip install "dynamanic-raggedy[all]" # everything
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## License
|
|
83
|
+
|
|
84
|
+
MIT
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Adapters
|
|
2
|
+
|
|
3
|
+
Every adapter is loaded lazily by name through `RaggedyConfig`. Missing extras surface as `ConfigError` with the exact `pip install` command — never a raw `ImportError`.
|
|
4
|
+
|
|
5
|
+
## Vector stores
|
|
6
|
+
|
|
7
|
+
| `store=` | Module | Extra | URL form | Notes |
|
|
8
|
+
|----------|--------|-------|----------|-------|
|
|
9
|
+
| `memory` | `raggedy.stores.memory.InMemoryStore` | — (numpy only) | n/a | Default. Dict + numpy cosine. |
|
|
10
|
+
| `sqlite-vec` | `raggedy.stores.sqlite_vec.SqliteVecStore`| `[sqlitevec]` | `:memory:` / file path / `sqlite:///…` | Local persistent. vec0 with `distance_metric=cosine`. |
|
|
11
|
+
| `pgvector` | `raggedy.stores.pgvector.PgVectorStore` | `[pgvector]` | `postgresql://…` | JSONB tags + GIN index. |
|
|
12
|
+
| `qdrant` | `raggedy.stores.qdrant.QdrantStore` | `[qdrant]` | `:memory:` / file path / `http(s)://…` | UUIDv5 from chunk id; one collection per Raggedy instance. |
|
|
13
|
+
| `chroma` | `raggedy.stores.chroma.ChromaStore` | `[chroma]` | `:memory:` / file path / `http(s)://…` | `embedding_function=None`; tags flattened to `tag_*` keys. |
|
|
14
|
+
| `weaviate` | `raggedy.stores.weaviate.WeaviateStore` | `[weaviate]` | `http(s)://…` | v4 async client; PascalCase collection. |
|
|
15
|
+
|
|
16
|
+
Shared behaviour:
|
|
17
|
+
|
|
18
|
+
- Cosine similarity throughout. Score = `1 - cosine_distance`.
|
|
19
|
+
- Schema lazy-created on first `upsert` so the embedding dimension is known. Subsequent upserts with a different dim raise `ProviderError`.
|
|
20
|
+
- Every adapter exposes `aclose()` to release pooled connections.
|
|
21
|
+
|
|
22
|
+
## LLM providers
|
|
23
|
+
|
|
24
|
+
| `llm=` | Module | Extra | Notes |
|
|
25
|
+
|--------|--------|-------|-------|
|
|
26
|
+
| `anthropic` | `raggedy.llm.anthropic.AnthropicProvider` | `[anthropic]` | Claude 4.x family. Reads `ANTHROPIC_API_KEY`. |
|
|
27
|
+
| `openai` | `raggedy.llm.openai.OpenAIProvider` | `[openai]` | tiktoken-aware `count_tokens` when installed. |
|
|
28
|
+
| `ollama` | `raggedy.llm.ollama.OllamaProvider` | — (base httpx) | Local. `estimate_cost` always returns 0. |
|
|
29
|
+
| `lm_studio` / `lmstudio` | `raggedy.llm.openai.OpenAIProvider` | `[openai]` | OpenAI-compatible endpoint; reads `LMSTUDIO_BASE_URL`, `LMSTUDIO_API_KEY`. |
|
|
30
|
+
|
|
31
|
+
## Embedding backends
|
|
32
|
+
|
|
33
|
+
| `embedding=` | Module | Extra | Notes |
|
|
34
|
+
|--------------|--------|-------|-------|
|
|
35
|
+
| `deterministic` | `raggedy.embeddings.deterministic.DeterministicEmbedder` | — | Hashed bag-of-words. Real algorithm, no semantics — good for tests/CI. |
|
|
36
|
+
| `sentence-transformers` / `local` | `raggedy.embeddings.local.SentenceTransformersEmbedder` | `[local]` | GPU-aware. `device="auto"` picks CUDA → MPS → CPU. |
|
|
37
|
+
| `openai` | `raggedy.embeddings.openai.OpenAIEmbedder` | `[openai]` | `text-embedding-3-small` default; pass `dimensions=` to truncate. |
|
|
38
|
+
| `ollama` | `raggedy.embeddings.ollama.OllamaEmbedder` | — (base httpx) | Uses `/api/embed` (batch); falls back to `/api/embeddings`. |
|
|
39
|
+
| `lm_studio` / `lmstudio` | `raggedy.embeddings.openai.OpenAIEmbedder` | `[openai]` | Reuses the OpenAI client pointed at LM Studio. |
|
|
40
|
+
|
|
41
|
+
## Chunkers
|
|
42
|
+
|
|
43
|
+
| `chunker_strategy=` | Module | Notes |
|
|
44
|
+
|---------------------|--------|-------|
|
|
45
|
+
| `semantic` | `raggedy.chunking.semantic.SemanticChunker` | Default. Paragraph + sentence boundaries, greedy merge, leading overlap. |
|
|
46
|
+
| `paragraph` | `raggedy.chunking.paragraph.ParagraphChunker` | One chunk per paragraph (no merging). |
|
|
47
|
+
| `sentence` | `raggedy.chunking.sentence.SentenceChunker` | Sentence-level greedy merge to `target_tokens`. |
|
|
48
|
+
| `fixed` | `raggedy.chunking.fixed.FixedChunker` | Fixed window with overlap; word-boundary snap. |
|
|
49
|
+
|
|
50
|
+
For a custom chunker, write any object satisfying the `Chunker` protocol and pass `chunker=` to `Raggedy(...)`. See [example 04](https://github.com/Dynamanic/raggedy/blob/main/examples/04_custom_chunker.py).
|
|
51
|
+
|
|
52
|
+
## Persistence
|
|
53
|
+
|
|
54
|
+
| `recorder=` / `cost_backend=` | Module | Extra |
|
|
55
|
+
|---|---|---|
|
|
56
|
+
| `recorder=memory` | `raggedy.recording.memory.InMemoryRunRecorder` | — |
|
|
57
|
+
| `recorder=sqlite` | `raggedy.recording.sqlite.SqliteRunRecorder` | `[sqlitevec]` |
|
|
58
|
+
| `cost_backend=memory` | `raggedy.cost.tracker.InMemoryCostTracker` | — |
|
|
59
|
+
| `cost_backend=sqlite` | `raggedy.cost.sqlite.SqliteCostTracker` | `[sqlitevec]` |
|
|
60
|
+
|
|
61
|
+
The `SqliteCostTracker` uses an atomic `INSERT … ON CONFLICT DO UPDATE` to keep concurrent writers correct without an app-level lock.
|