dynamanic-raggedy 0.1.0a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. dynamanic_raggedy-0.1.0a0/.env.example +22 -0
  2. dynamanic_raggedy-0.1.0a0/.github/workflows/ci.yml +119 -0
  3. dynamanic_raggedy-0.1.0a0/.github/workflows/docs.yml +43 -0
  4. dynamanic_raggedy-0.1.0a0/.gitignore +23 -0
  5. dynamanic_raggedy-0.1.0a0/.python-version +1 -0
  6. dynamanic_raggedy-0.1.0a0/CHANGELOG.md +65 -0
  7. dynamanic_raggedy-0.1.0a0/LICENSE +21 -0
  8. dynamanic_raggedy-0.1.0a0/PKG-INFO +163 -0
  9. dynamanic_raggedy-0.1.0a0/README.md +84 -0
  10. dynamanic_raggedy-0.1.0a0/docs/adapters.md +61 -0
  11. dynamanic_raggedy-0.1.0a0/docs/api-reference.md +166 -0
  12. dynamanic_raggedy-0.1.0a0/docs/concepts.md +93 -0
  13. dynamanic_raggedy-0.1.0a0/docs/configuration.md +127 -0
  14. dynamanic_raggedy-0.1.0a0/docs/index.md +40 -0
  15. dynamanic_raggedy-0.1.0a0/docs/intake.md +76 -0
  16. dynamanic_raggedy-0.1.0a0/docs/quickstart.md +93 -0
  17. dynamanic_raggedy-0.1.0a0/examples/01_hello_world.py +106 -0
  18. dynamanic_raggedy-0.1.0a0/examples/02_pgvector_anthropic.py +68 -0
  19. dynamanic_raggedy-0.1.0a0/examples/03_local_ollama.py +59 -0
  20. dynamanic_raggedy-0.1.0a0/examples/04_custom_chunker.py +110 -0
  21. dynamanic_raggedy-0.1.0a0/examples/05_intake_directory.py +67 -0
  22. dynamanic_raggedy-0.1.0a0/examples/06_lm_studio_gpu.py +56 -0
  23. dynamanic_raggedy-0.1.0a0/examples/examples_helpers.py +40 -0
  24. dynamanic_raggedy-0.1.0a0/mkdocs.yml +54 -0
  25. dynamanic_raggedy-0.1.0a0/pyproject.toml +178 -0
  26. dynamanic_raggedy-0.1.0a0/src/raggedy/__init__.py +70 -0
  27. dynamanic_raggedy-0.1.0a0/src/raggedy/_version.py +1 -0
  28. dynamanic_raggedy-0.1.0a0/src/raggedy/chunking/__init__.py +6 -0
  29. dynamanic_raggedy-0.1.0a0/src/raggedy/chunking/fixed.py +81 -0
  30. dynamanic_raggedy-0.1.0a0/src/raggedy/chunking/paragraph.py +70 -0
  31. dynamanic_raggedy-0.1.0a0/src/raggedy/chunking/semantic.py +149 -0
  32. dynamanic_raggedy-0.1.0a0/src/raggedy/chunking/sentence.py +101 -0
  33. dynamanic_raggedy-0.1.0a0/src/raggedy/config.py +117 -0
  34. dynamanic_raggedy-0.1.0a0/src/raggedy/cost/__init__.py +7 -0
  35. dynamanic_raggedy-0.1.0a0/src/raggedy/cost/pricing.py +36 -0
  36. dynamanic_raggedy-0.1.0a0/src/raggedy/cost/sqlite.py +159 -0
  37. dynamanic_raggedy-0.1.0a0/src/raggedy/cost/tracker.py +75 -0
  38. dynamanic_raggedy-0.1.0a0/src/raggedy/embeddings/__init__.py +3 -0
  39. dynamanic_raggedy-0.1.0a0/src/raggedy/embeddings/deterministic.py +55 -0
  40. dynamanic_raggedy-0.1.0a0/src/raggedy/embeddings/local.py +103 -0
  41. dynamanic_raggedy-0.1.0a0/src/raggedy/embeddings/ollama.py +82 -0
  42. dynamanic_raggedy-0.1.0a0/src/raggedy/embeddings/openai.py +65 -0
  43. dynamanic_raggedy-0.1.0a0/src/raggedy/errors.py +34 -0
  44. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/__init__.py +37 -0
  45. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/__init__.py +10 -0
  46. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/docx.py +44 -0
  47. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/html.py +64 -0
  48. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/pdf.py +44 -0
  49. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/plaintext.py +33 -0
  50. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/extractors/registry.py +103 -0
  51. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/pipeline.py +58 -0
  52. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/protocols.py +28 -0
  53. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/sources/__init__.py +5 -0
  54. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/sources/files.py +100 -0
  55. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/sources/memory.py +19 -0
  56. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/sources/s3.py +83 -0
  57. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/sources/url.py +71 -0
  58. dynamanic_raggedy-0.1.0a0/src/raggedy/intake/types.py +23 -0
  59. dynamanic_raggedy-0.1.0a0/src/raggedy/llm/__init__.py +3 -0
  60. dynamanic_raggedy-0.1.0a0/src/raggedy/llm/anthropic.py +90 -0
  61. dynamanic_raggedy-0.1.0a0/src/raggedy/llm/ollama.py +99 -0
  62. dynamanic_raggedy-0.1.0a0/src/raggedy/llm/openai.py +112 -0
  63. dynamanic_raggedy-0.1.0a0/src/raggedy/llm/registry.py +87 -0
  64. dynamanic_raggedy-0.1.0a0/src/raggedy/obs/__init__.py +3 -0
  65. dynamanic_raggedy-0.1.0a0/src/raggedy/obs/default.py +33 -0
  66. dynamanic_raggedy-0.1.0a0/src/raggedy/pii/__init__.py +4 -0
  67. dynamanic_raggedy-0.1.0a0/src/raggedy/pii/noop.py +22 -0
  68. dynamanic_raggedy-0.1.0a0/src/raggedy/pii/patterns.py +107 -0
  69. dynamanic_raggedy-0.1.0a0/src/raggedy/pii/redactor.py +99 -0
  70. dynamanic_raggedy-0.1.0a0/src/raggedy/pipeline.py +816 -0
  71. dynamanic_raggedy-0.1.0a0/src/raggedy/prompts/__init__.py +3 -0
  72. dynamanic_raggedy-0.1.0a0/src/raggedy/prompts/templates.py +45 -0
  73. dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/__init__.py +21 -0
  74. dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/chunker.py +14 -0
  75. dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/cost.py +18 -0
  76. dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/embedding.py +18 -0
  77. dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/events.py +16 -0
  78. dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/llm.py +28 -0
  79. dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/pii.py +14 -0
  80. dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/recorder.py +41 -0
  81. dynamanic_raggedy-0.1.0a0/src/raggedy/protocols/store.py +30 -0
  82. dynamanic_raggedy-0.1.0a0/src/raggedy/recording/__init__.py +6 -0
  83. dynamanic_raggedy-0.1.0a0/src/raggedy/recording/memory.py +101 -0
  84. dynamanic_raggedy-0.1.0a0/src/raggedy/recording/sqlite.py +317 -0
  85. dynamanic_raggedy-0.1.0a0/src/raggedy/stores/__init__.py +9 -0
  86. dynamanic_raggedy-0.1.0a0/src/raggedy/stores/chroma.py +349 -0
  87. dynamanic_raggedy-0.1.0a0/src/raggedy/stores/memory.py +93 -0
  88. dynamanic_raggedy-0.1.0a0/src/raggedy/stores/pgvector.py +330 -0
  89. dynamanic_raggedy-0.1.0a0/src/raggedy/stores/qdrant.py +338 -0
  90. dynamanic_raggedy-0.1.0a0/src/raggedy/stores/sqlite_vec.py +331 -0
  91. dynamanic_raggedy-0.1.0a0/src/raggedy/stores/weaviate.py +352 -0
  92. dynamanic_raggedy-0.1.0a0/src/raggedy/types.py +189 -0
  93. dynamanic_raggedy-0.1.0a0/src/raggedy/util/__init__.py +5 -0
  94. dynamanic_raggedy-0.1.0a0/src/raggedy/util/ids.py +13 -0
  95. dynamanic_raggedy-0.1.0a0/src/raggedy/util/sync.py +14 -0
  96. dynamanic_raggedy-0.1.0a0/src/raggedy/util/tokens.py +15 -0
  97. dynamanic_raggedy-0.1.0a0/tests/conftest.py +138 -0
  98. dynamanic_raggedy-0.1.0a0/tests/intake/__init__.py +0 -0
  99. dynamanic_raggedy-0.1.0a0/tests/intake/test_extractors.py +50 -0
  100. dynamanic_raggedy-0.1.0a0/tests/intake/test_ingest_from.py +64 -0
  101. dynamanic_raggedy-0.1.0a0/tests/intake/test_sources.py +101 -0
  102. dynamanic_raggedy-0.1.0a0/tests/intake/test_url_source.py +76 -0
  103. dynamanic_raggedy-0.1.0a0/tests/stores/__init__.py +0 -0
  104. dynamanic_raggedy-0.1.0a0/tests/stores/test_chroma.py +156 -0
  105. dynamanic_raggedy-0.1.0a0/tests/stores/test_memory.py +133 -0
  106. dynamanic_raggedy-0.1.0a0/tests/stores/test_pgvector.py +140 -0
  107. dynamanic_raggedy-0.1.0a0/tests/stores/test_qdrant.py +170 -0
  108. dynamanic_raggedy-0.1.0a0/tests/stores/test_sqlite_vec.py +161 -0
  109. dynamanic_raggedy-0.1.0a0/tests/stores/test_weaviate.py +142 -0
  110. dynamanic_raggedy-0.1.0a0/tests/test_chunkers.py +102 -0
  111. dynamanic_raggedy-0.1.0a0/tests/test_chunking.py +43 -0
  112. dynamanic_raggedy-0.1.0a0/tests/test_config_yaml.py +71 -0
  113. dynamanic_raggedy-0.1.0a0/tests/test_pii.py +48 -0
  114. dynamanic_raggedy-0.1.0a0/tests/test_pipeline.py +105 -0
  115. dynamanic_raggedy-0.1.0a0/tests/test_pipeline_errors.py +80 -0
  116. dynamanic_raggedy-0.1.0a0/tests/test_sqlite_cost.py +101 -0
  117. dynamanic_raggedy-0.1.0a0/tests/test_sqlite_recorder.py +134 -0
@@ -0,0 +1,22 @@
1
+ # Copy to .env and uncomment any keys you want Raggedy to pick up.
2
+ # `Raggedy()` calls python-dotenv at construction time and populates
3
+ # os.environ (without overriding existing vars), so provider SDKs
4
+ # (anthropic, openai) read these directly.
5
+
6
+ # --- LLM providers ----------------------------------------------------
7
+ # ANTHROPIC_API_KEY=
8
+ # OPENAI_API_KEY=
9
+ # OPENAI_ORG=
10
+ # OLLAMA_HOST=http://localhost:11434
11
+ # LMSTUDIO_BASE_URL=http://localhost:1234/v1
12
+ # LMSTUDIO_API_KEY=lm-studio
13
+
14
+ # --- Raggedy adapter selection (any RAGGEDY_* setting in config.py) ---
15
+ # RAGGEDY_LLM=anthropic
16
+ # RAGGEDY_LLM_MODEL=claude-haiku-4-5-20251001
17
+ # RAGGEDY_EMBEDDING=sentence-transformers
18
+ # RAGGEDY_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
19
+ # RAGGEDY_DEVICE=auto # auto | cuda | mps | cpu
20
+ # RAGGEDY_STORE=memory
21
+ # RAGGEDY_NAMESPACE=default
22
+ # RAGGEDY_COST_DAILY_LIMIT_USD=1.00
@@ -0,0 +1,119 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ concurrency:
10
+ group: ci-${{ github.ref }}
11
+ cancel-in-progress: true
12
+
13
+ jobs:
14
+ lint-and-type:
15
+ name: Lint + types (py3.12)
16
+ runs-on: ubuntu-latest
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: "3.12"
22
+ cache: pip
23
+ - name: Install
24
+ run: |
25
+ python -m pip install --upgrade pip
26
+ pip install -e ".[dev]"
27
+ - name: Ruff
28
+ run: python -m ruff check src tests examples
29
+ - name: Mypy
30
+ run: python -m mypy src
31
+
32
+ test:
33
+ name: pytest (py${{ matrix.python }})
34
+ runs-on: ubuntu-latest
35
+ strategy:
36
+ fail-fast: false
37
+ matrix:
38
+ python: ["3.11", "3.12", "3.13"]
39
+ steps:
40
+ - uses: actions/checkout@v4
41
+ - uses: actions/setup-python@v5
42
+ with:
43
+ python-version: ${{ matrix.python }}
44
+ cache: pip
45
+ - name: Install base + dev
46
+ run: |
47
+ python -m pip install --upgrade pip
48
+ pip install -e ".[dev]"
49
+ - name: Run tests (no coverage gate — adapter modules require extras)
50
+ run: python -m pytest -q --no-header
51
+
52
+ test-with-extras:
53
+ name: pytest + coverage (sqlite-vec + chroma + qdrant)
54
+ runs-on: ubuntu-latest
55
+ steps:
56
+ - uses: actions/checkout@v4
57
+ - uses: actions/setup-python@v5
58
+ with:
59
+ python-version: "3.12"
60
+ cache: pip
61
+ - name: Install extras
62
+ run: |
63
+ python -m pip install --upgrade pip
64
+ pip install -e ".[dev,sqlitevec,chroma,qdrant]"
65
+ - name: Run tests with coverage gate
66
+ run: |
67
+ python -m pytest -q \
68
+ --cov=raggedy \
69
+ --cov-report=term-missing \
70
+ --cov-report=xml
71
+ - name: Upload coverage artifact
72
+ uses: actions/upload-artifact@v4
73
+ with:
74
+ name: coverage-xml
75
+ path: coverage.xml
76
+
77
+ pgvector-integration:
78
+ name: pgvector integration
79
+ runs-on: ubuntu-latest
80
+ services:
81
+ postgres:
82
+ image: ankane/pgvector
83
+ env:
84
+ POSTGRES_PASSWORD: pg
85
+ ports:
86
+ - 5432:5432
87
+ options: >-
88
+ --health-cmd="pg_isready -U postgres"
89
+ --health-interval=5s --health-timeout=5s --health-retries=10
90
+ env:
91
+ RAGGEDY_TEST_PG_URL: postgresql://postgres:pg@localhost:5432/postgres
92
+ steps:
93
+ - uses: actions/checkout@v4
94
+ - uses: actions/setup-python@v5
95
+ with:
96
+ python-version: "3.12"
97
+ cache: pip
98
+ - name: Install
99
+ run: |
100
+ python -m pip install --upgrade pip
101
+ pip install -e ".[dev,pgvector]"
102
+ - name: pgvector tests
103
+ run: python -m pytest -q tests/stores/test_pgvector.py
104
+
105
+ docs-build:
106
+ name: docs build
107
+ runs-on: ubuntu-latest
108
+ steps:
109
+ - uses: actions/checkout@v4
110
+ - uses: actions/setup-python@v5
111
+ with:
112
+ python-version: "3.12"
113
+ cache: pip
114
+ - name: Install
115
+ run: |
116
+ python -m pip install --upgrade pip
117
+ pip install -e ".[docs]"
118
+ - name: mkdocs build (strict)
119
+ run: python -m mkdocs build --strict
@@ -0,0 +1,43 @@
1
+ name: Docs
2
+
3
+ # Pages must be enabled in repo Settings → Pages (source: GitHub Actions)
4
+ # before this workflow can deploy. Run manually via `gh workflow run docs.yml`
5
+ # once Pages is set up; auto-deploy on push is intentionally disabled to
6
+ # avoid red CI on repos where Pages isn't configured yet.
7
+ on:
8
+ workflow_dispatch:
9
+
10
+ permissions:
11
+ contents: read
12
+ pages: write
13
+ id-token: write
14
+
15
+ concurrency:
16
+ group: docs-${{ github.ref }}
17
+ cancel-in-progress: true
18
+
19
+ jobs:
20
+ deploy:
21
+ name: build & deploy
22
+ runs-on: ubuntu-latest
23
+ environment:
24
+ name: github-pages
25
+ url: ${{ steps.deployment.outputs.page_url }}
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: actions/setup-python@v5
29
+ with:
30
+ python-version: "3.12"
31
+ cache: pip
32
+ - name: Install
33
+ run: |
34
+ python -m pip install --upgrade pip
35
+ pip install -e ".[docs]"
36
+ - name: Build site
37
+ run: python -m mkdocs build --strict --site-dir _site
38
+ - uses: actions/configure-pages@v5
39
+ - uses: actions/upload-pages-artifact@v3
40
+ with:
41
+ path: _site
42
+ - id: deployment
43
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,23 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ *.egg
5
+ .venv/
6
+ .env
7
+ .envrc
8
+ build/
9
+ dist/
10
+ .pytest_cache/
11
+ .mypy_cache/
12
+ .ruff_cache/
13
+ .coverage
14
+ coverage.xml
15
+ htmlcov/
16
+ *.sqlite
17
+ *.db
18
+ .DS_Store
19
+ .idea/
20
+ .vscode/
21
+
22
+ # mkdocs build output
23
+ site/
@@ -0,0 +1 @@
1
+ 3.11
@@ -0,0 +1,65 @@
1
+ # Changelog
2
+
3
+ All notable changes to Raggedy are documented here. The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
4
+
5
+ ## [Unreleased]
6
+
7
+ ## [0.1.0] — 2026-05-19
8
+
9
+ Initial alpha release.
10
+
11
+ ### Added
12
+
13
+ #### Core pipeline
14
+ - `Raggedy` facade and `RAGPipeline` async orchestrator (14-step flow: validate → start run → cost-gate → PII-redact → embed → retrieve → re-redact → pack → render prompt → LLM → record usage → finish run → return).
15
+ - Eight Protocols for full adapter swappability: `VectorStore`, `EmbeddingBackend`, `LLMProvider`, `Chunker`, `PIIRedactor`, `CostTracker`, `RunRecorder`, `Logger`/`EventSink`.
16
+ - Domain-neutral isolation: single `namespace: str` plus free-form `tags: dict[str, str]`. No tenant / matter / legal-specific concepts.
17
+ - `RaggedyConfig` (pydantic-settings) with `RAGGEDY_*` env prefix, `.env` auto-load via python-dotenv, and `from_yaml(path)` loader.
18
+
19
+ #### Vector stores
20
+ - `InMemoryStore` — numpy cosine, namespace + tag filter.
21
+ - `SqliteVecStore` (`[sqlitevec]`) — `sqlite-vec` extension, `distance_metric=cosine`, in-memory and file-backed.
22
+ - `PgVectorStore` (`[pgvector]`) — asyncpg + pgvector; JSONB tags with GIN index.
23
+ - `QdrantStore` (`[qdrant]`) — UUIDv5 from chunk id; supports embedded `:memory:` and HTTP/HTTPS endpoints.
24
+ - `ChromaStore` (`[chroma]`) — EphemeralClient / PersistentClient / HttpClient; primitive metadata flattening.
25
+ - `WeaviateStore` (`[weaviate]`) — v4 async client; PascalCase collection.
26
+
27
+ #### LLM providers
28
+ - `AnthropicProvider` (`[anthropic]`) — Claude 4.x family.
29
+ - `OpenAIProvider` (`[openai]`) — tiktoken-aware `count_tokens` when installed.
30
+ - `OllamaProvider` — uses base httpx; local zero-cost.
31
+ - LM Studio dispatched through `OpenAIProvider` against `LMSTUDIO_BASE_URL`.
32
+
33
+ #### Embedding backends
34
+ - `DeterministicEmbedder` — hashed bag-of-words (real algorithm, useful for tests).
35
+ - `SentenceTransformersEmbedder` (`[local]`) — GPU-aware via `detect_device()` (CUDA → MPS → CPU); honours `embedding_batch_size`.
36
+ - `OpenAIEmbedder` (`[openai]`).
37
+ - `OllamaEmbedder` — uses base httpx; batch `/api/embed` with single-shot fallback.
38
+
39
+ #### Chunkers
40
+ - `SemanticChunker` — paragraph + sentence boundaries, greedy merge, leading overlap, char-offset tracking.
41
+ - `ParagraphChunker` — one chunk per paragraph.
42
+ - `SentenceChunker` — sentence-level greedy merge.
43
+ - `FixedChunker` — fixed-window with word-boundary snap.
44
+
45
+ #### Intake layer
46
+ - Sources: `MemorySource`, `FileSource`, `DirectorySource`, `UrlSource` (base), `S3Source` (`[s3]`).
47
+ - Extractors: `PlainTextExtractor`, `HtmlExtractor` (stdlib only), `PdfExtractor` (`[pdf]`), `DocxExtractor` (`[docx]`).
48
+ - `IntakeRunner` orchestrates source → extractor → `Document`. `ExtractorRegistry` dispatches by `content_type`.
49
+ - `Raggedy.ingest_from(source)` one-liner with batching.
50
+
51
+ #### Batteries
52
+ - `RegexPIIRedactor` — 10 PII types (SSN, email, phone, DOB, address, credit-card with Luhn, IP, passport, driver's license, bank account). `RedactionResult.redaction_map` is in-memory only; never persisted.
53
+ - `InMemoryCostTracker` and `SqliteCostTracker` (atomic daily upsert via `INSERT … ON CONFLICT DO UPDATE`).
54
+ - `InMemoryRunRecorder` (bounded deque) and `SqliteRunRecorder` (FK-cascade artifacts, durable).
55
+ - Versioned RAG prompt templates pinned in every run record (`prompt_template_version`).
56
+
57
+ #### Tests, examples, docs
58
+ - 73+ tests; full-suite tests run offline; pgvector / Weaviate / live-LLM tests skip-if-unavailable.
59
+ - 6 worked examples (hello world, pgvector + Anthropic, local Ollama, custom chunker, intake directory, LM Studio GPU).
60
+ - mkdocs-material site (`docs/`).
61
+
62
+ ### Design constraints
63
+ - No mock LLM providers ship with the library; tests use real providers with skip-if-unavailable.
64
+ - Async-first; sync shims via `asgiref.async_to_sync`.
65
+ - All external calls wrapped → `ProviderError` / `ConfigError`. Run record always finalises with the appropriate status on every code path.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Dynamanic LLC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,163 @@
1
+ Metadata-Version: 2.4
2
+ Name: dynamanic-raggedy
3
+ Version: 0.1.0a0
4
+ Summary: Batteries-included, importable RAG core.
5
+ Project-URL: Homepage, https://github.com/Dynamanic/raggedy
6
+ Project-URL: Repository, https://github.com/Dynamanic/raggedy
7
+ Author-email: Mike Jackson <dynamanicllc@gmail.com>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: embeddings,llm,rag,retrieval,vector-search
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.11
19
+ Requires-Dist: asgiref>=3.8
20
+ Requires-Dist: httpx>=0.27
21
+ Requires-Dist: numpy>=1.26
22
+ Requires-Dist: pydantic-settings>=2.3
23
+ Requires-Dist: pydantic>=2.7
24
+ Requires-Dist: python-dotenv>=1.0
25
+ Requires-Dist: python-ulid>=2.7
26
+ Requires-Dist: pyyaml>=6.0
27
+ Provides-Extra: all
28
+ Requires-Dist: aiosqlite>=0.20; extra == 'all'
29
+ Requires-Dist: anthropic>=0.34; extra == 'all'
30
+ Requires-Dist: asyncpg>=0.29; extra == 'all'
31
+ Requires-Dist: boto3>=1.34; extra == 'all'
32
+ Requires-Dist: chromadb>=0.5; extra == 'all'
33
+ Requires-Dist: openai>=1.40; extra == 'all'
34
+ Requires-Dist: pgvector>=0.3; extra == 'all'
35
+ Requires-Dist: pypdf>=4.0; extra == 'all'
36
+ Requires-Dist: python-docx>=1.1; extra == 'all'
37
+ Requires-Dist: qdrant-client>=1.10; extra == 'all'
38
+ Requires-Dist: sentence-transformers>=3.0; extra == 'all'
39
+ Requires-Dist: sqlite-vec>=0.1.3; extra == 'all'
40
+ Requires-Dist: torch>=2.2; extra == 'all'
41
+ Requires-Dist: weaviate-client>=4.6; extra == 'all'
42
+ Provides-Extra: anthropic
43
+ Requires-Dist: anthropic>=0.34; extra == 'anthropic'
44
+ Provides-Extra: chroma
45
+ Requires-Dist: chromadb>=0.5; extra == 'chroma'
46
+ Provides-Extra: dev
47
+ Requires-Dist: mypy>=1.10; extra == 'dev'
48
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
49
+ Requires-Dist: pytest-cov>=5; extra == 'dev'
50
+ Requires-Dist: pytest>=8; extra == 'dev'
51
+ Requires-Dist: ruff>=0.5; extra == 'dev'
52
+ Provides-Extra: docs
53
+ Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
54
+ Requires-Dist: mkdocs>=1.6; extra == 'docs'
55
+ Requires-Dist: pymdown-extensions>=10; extra == 'docs'
56
+ Provides-Extra: docx
57
+ Requires-Dist: python-docx>=1.1; extra == 'docx'
58
+ Provides-Extra: local
59
+ Requires-Dist: sentence-transformers>=3.0; extra == 'local'
60
+ Requires-Dist: torch>=2.2; extra == 'local'
61
+ Provides-Extra: ollama
62
+ Provides-Extra: openai
63
+ Requires-Dist: openai>=1.40; extra == 'openai'
64
+ Provides-Extra: pdf
65
+ Requires-Dist: pypdf>=4.0; extra == 'pdf'
66
+ Provides-Extra: pgvector
67
+ Requires-Dist: asyncpg>=0.29; extra == 'pgvector'
68
+ Requires-Dist: pgvector>=0.3; extra == 'pgvector'
69
+ Provides-Extra: qdrant
70
+ Requires-Dist: qdrant-client>=1.10; extra == 'qdrant'
71
+ Provides-Extra: s3
72
+ Requires-Dist: boto3>=1.34; extra == 's3'
73
+ Provides-Extra: sqlitevec
74
+ Requires-Dist: aiosqlite>=0.20; extra == 'sqlitevec'
75
+ Requires-Dist: sqlite-vec>=0.1.3; extra == 'sqlitevec'
76
+ Provides-Extra: weaviate
77
+ Requires-Dist: weaviate-client>=4.6; extra == 'weaviate'
78
+ Description-Content-Type: text/markdown
79
+
80
+ # Raggedy
81
+
82
+ Batteries-included, importable RAG core. `pip install dynamanic-raggedy`, then `from raggedy import ...` — drop it into any Python app.
83
+
84
+ ## Status
85
+
86
+ Pre-release (`0.1.0a0`). The library ships no mock providers — every LLM and embedder is a real adapter. Tests skip cleanly when no live LLM is reachable.
87
+
88
+ Phase 1 + intake + real providers shipped: in-memory store, deterministic embedder, semantic chunker, PII redaction (10 types, ephemeral map), cost tracking with daily limits, audit-run records, intake layer (files / directories / URLs / S3 → plain text / markdown / HTML / PDF / DOCX), and LLM adapters for Anthropic, OpenAI, Ollama, and LM Studio plus a GPU-aware local embedder (sentence-transformers, CUDA → MPS → CPU autodetect).
89
+
90
+ ## Quick start
91
+
92
+ ```python
93
+ from raggedy import Raggedy, RaggedyConfig, Document
94
+
95
+ rag = Raggedy(RaggedyConfig(
96
+ llm="anthropic", # or "openai", "ollama", "lm_studio"
97
+ llm_model="claude-haiku-4-5-20251001",
98
+ embedding="sentence-transformers",
99
+ embedding_model="sentence-transformers/all-MiniLM-L6-v2",
100
+ device="auto", # CUDA → MPS → CPU
101
+ store="memory",
102
+ namespace="kb-handbook",
103
+ ))
104
+
105
+ rag.ingest_sync([
106
+ Document(id="hb-1", text="Refunds are processed within 5 business days."),
107
+ ])
108
+
109
+ result = rag.query_sync("How long do refunds take?", top_k=4)
110
+ print(result.answer)
111
+ for s in result.sources:
112
+ print(f"[Source {s.index}] {s.document_id} score={s.score:.3f}")
113
+ print(f"cost=${result.cost_usd:.6f} run_id={result.run_id}")
114
+ ```
115
+
116
+ ### `.env`
117
+
118
+ `Raggedy()` calls `python-dotenv` once at construction and populates `os.environ` (without overriding existing vars). Drop your keys in a `.env` file at the project root:
119
+
120
+ ```
121
+ ANTHROPIC_API_KEY=sk-ant-...
122
+ OPENAI_API_KEY=sk-...
123
+ OLLAMA_HOST=http://localhost:11434
124
+ LMSTUDIO_BASE_URL=http://localhost:1234/v1
125
+ ```
126
+
127
+ See `.env.example` for the full list.
128
+
129
+ ### Intake
130
+
131
+ `Raggedy.ingest_from(source)` streams documents through extractors before indexing.
132
+
133
+ ```python
134
+ from raggedy.intake import DirectorySource, UrlSource
135
+
136
+ rag.ingest_from_sync(DirectorySource("./docs"), tags={"corpus": "handbook"})
137
+ rag.ingest_from_sync(UrlSource(["https://example.com/blog"]))
138
+ ```
139
+
140
+ ## Install
141
+
142
+ The PyPI distribution name is `dynamanic-raggedy` (the short name `raggedy` is taken by an unrelated maintained project). The import path is still `raggedy`.
143
+
144
+ ```sh
145
+ pip install dynamanic-raggedy # base: deterministic embedder, in-memory store, intake stdlib parsers
146
+ pip install "dynamanic-raggedy[anthropic]" # Anthropic LLM
147
+ pip install "dynamanic-raggedy[openai]" # OpenAI LLM + embeddings; also powers LM Studio (OpenAI-compatible)
148
+ pip install "dynamanic-raggedy[ollama]" # no extra deps — ollama uses base httpx
149
+ pip install "dynamanic-raggedy[local]" # sentence-transformers + torch (GPU-aware)
150
+ pip install "dynamanic-raggedy[pdf]" # PDF extractor (pypdf)
151
+ pip install "dynamanic-raggedy[docx]" # DOCX extractor (python-docx)
152
+ pip install "dynamanic-raggedy[s3]" # S3 source (boto3)
153
+ pip install "dynamanic-raggedy[sqlitevec]" # SQLite-vec store
154
+ pip install "dynamanic-raggedy[pgvector]" # pgvector store
155
+ pip install "dynamanic-raggedy[qdrant]" # Qdrant store
156
+ pip install "dynamanic-raggedy[chroma]" # Chroma store
157
+ pip install "dynamanic-raggedy[weaviate]" # Weaviate store
158
+ pip install "dynamanic-raggedy[all]" # everything
159
+ ```
160
+
161
+ ## License
162
+
163
+ MIT
@@ -0,0 +1,84 @@
1
+ # Raggedy
2
+
3
+ Batteries-included, importable RAG core. `pip install dynamanic-raggedy`, then `from raggedy import ...` — drop it into any Python app.
4
+
5
+ ## Status
6
+
7
+ Pre-release (`0.1.0a0`). The library ships no mock providers — every LLM and embedder is a real adapter. Tests skip cleanly when no live LLM is reachable.
8
+
9
+ Phase 1 + intake + real providers shipped: in-memory store, deterministic embedder, semantic chunker, PII redaction (10 types, ephemeral map), cost tracking with daily limits, audit-run records, intake layer (files / directories / URLs / S3 → plain text / markdown / HTML / PDF / DOCX), and LLM adapters for Anthropic, OpenAI, Ollama, and LM Studio plus a GPU-aware local embedder (sentence-transformers, CUDA → MPS → CPU autodetect).
10
+
11
+ ## Quick start
12
+
13
+ ```python
14
+ from raggedy import Raggedy, RaggedyConfig, Document
15
+
16
+ rag = Raggedy(RaggedyConfig(
17
+ llm="anthropic", # or "openai", "ollama", "lm_studio"
18
+ llm_model="claude-haiku-4-5-20251001",
19
+ embedding="sentence-transformers",
20
+ embedding_model="sentence-transformers/all-MiniLM-L6-v2",
21
+ device="auto", # CUDA → MPS → CPU
22
+ store="memory",
23
+ namespace="kb-handbook",
24
+ ))
25
+
26
+ rag.ingest_sync([
27
+ Document(id="hb-1", text="Refunds are processed within 5 business days."),
28
+ ])
29
+
30
+ result = rag.query_sync("How long do refunds take?", top_k=4)
31
+ print(result.answer)
32
+ for s in result.sources:
33
+ print(f"[Source {s.index}] {s.document_id} score={s.score:.3f}")
34
+ print(f"cost=${result.cost_usd:.6f} run_id={result.run_id}")
35
+ ```
36
+
37
+ ### `.env`
38
+
39
+ `Raggedy()` calls `python-dotenv` once at construction and populates `os.environ` (without overriding existing vars). Drop your keys in a `.env` file at the project root:
40
+
41
+ ```
42
+ ANTHROPIC_API_KEY=sk-ant-...
43
+ OPENAI_API_KEY=sk-...
44
+ OLLAMA_HOST=http://localhost:11434
45
+ LMSTUDIO_BASE_URL=http://localhost:1234/v1
46
+ ```
47
+
48
+ See `.env.example` for the full list.
49
+
50
+ ### Intake
51
+
52
+ `Raggedy.ingest_from(source)` streams documents through extractors before indexing.
53
+
54
+ ```python
55
+ from raggedy.intake import DirectorySource, UrlSource
56
+
57
+ rag.ingest_from_sync(DirectorySource("./docs"), tags={"corpus": "handbook"})
58
+ rag.ingest_from_sync(UrlSource(["https://example.com/blog"]))
59
+ ```
60
+
61
+ ## Install
62
+
63
+ The PyPI distribution name is `dynamanic-raggedy` (the short name `raggedy` is taken by an unrelated maintained project). The import path is still `raggedy`.
64
+
65
+ ```sh
66
+ pip install dynamanic-raggedy # base: deterministic embedder, in-memory store, intake stdlib parsers
67
+ pip install "dynamanic-raggedy[anthropic]" # Anthropic LLM
68
+ pip install "dynamanic-raggedy[openai]" # OpenAI LLM + embeddings; also powers LM Studio (OpenAI-compatible)
69
+ pip install "dynamanic-raggedy[ollama]" # no extra deps — ollama uses base httpx
70
+ pip install "dynamanic-raggedy[local]" # sentence-transformers + torch (GPU-aware)
71
+ pip install "dynamanic-raggedy[pdf]" # PDF extractor (pypdf)
72
+ pip install "dynamanic-raggedy[docx]" # DOCX extractor (python-docx)
73
+ pip install "dynamanic-raggedy[s3]" # S3 source (boto3)
74
+ pip install "dynamanic-raggedy[sqlitevec]" # SQLite-vec store
75
+ pip install "dynamanic-raggedy[pgvector]" # pgvector store
76
+ pip install "dynamanic-raggedy[qdrant]" # Qdrant store
77
+ pip install "dynamanic-raggedy[chroma]" # Chroma store
78
+ pip install "dynamanic-raggedy[weaviate]" # Weaviate store
79
+ pip install "dynamanic-raggedy[all]" # everything
80
+ ```
81
+
82
+ ## License
83
+
84
+ MIT
@@ -0,0 +1,61 @@
1
+ # Adapters
2
+
3
+ Every adapter is loaded lazily by name through `RaggedyConfig`. Missing extras surface as `ConfigError` with the exact `pip install` command — never a raw `ImportError`.
4
+
5
+ ## Vector stores
6
+
7
+ | `store=` | Module | Extra | URL form | Notes |
8
+ |----------|--------|-------|----------|-------|
9
+ | `memory` | `raggedy.stores.memory.InMemoryStore` | — (numpy only) | n/a | Default. Dict + numpy cosine. |
10
+ | `sqlite-vec` | `raggedy.stores.sqlite_vec.SqliteVecStore`| `[sqlitevec]` | `:memory:` / file path / `sqlite:///…` | Local persistent. vec0 with `distance_metric=cosine`. |
11
+ | `pgvector` | `raggedy.stores.pgvector.PgVectorStore` | `[pgvector]` | `postgresql://…` | JSONB tags + GIN index. |
12
+ | `qdrant` | `raggedy.stores.qdrant.QdrantStore` | `[qdrant]` | `:memory:` / file path / `http(s)://…` | UUIDv5 from chunk id; one collection per Raggedy instance. |
13
+ | `chroma` | `raggedy.stores.chroma.ChromaStore` | `[chroma]` | `:memory:` / file path / `http(s)://…` | `embedding_function=None`; tags flattened to `tag_*` keys. |
14
+ | `weaviate` | `raggedy.stores.weaviate.WeaviateStore` | `[weaviate]` | `http(s)://…` | v4 async client; PascalCase collection. |
15
+
16
+ Shared behaviour:
17
+
18
+ - Cosine similarity throughout. Score = `1 - cosine_distance`.
19
+ - Schema lazy-created on first `upsert` so the embedding dimension is known. Subsequent upserts with a different dim raise `ProviderError`.
20
+ - Every adapter exposes `aclose()` to release pooled connections.
21
+
22
+ ## LLM providers
23
+
24
+ | `llm=` | Module | Extra | Notes |
25
+ |--------|--------|-------|-------|
26
+ | `anthropic` | `raggedy.llm.anthropic.AnthropicProvider` | `[anthropic]` | Claude 4.x family. Reads `ANTHROPIC_API_KEY`. |
27
+ | `openai` | `raggedy.llm.openai.OpenAIProvider` | `[openai]` | tiktoken-aware `count_tokens` when installed. |
28
+ | `ollama` | `raggedy.llm.ollama.OllamaProvider` | — (base httpx) | Local. `estimate_cost` always returns 0. |
29
+ | `lm_studio` / `lmstudio` | `raggedy.llm.openai.OpenAIProvider` | `[openai]` | OpenAI-compatible endpoint; reads `LMSTUDIO_BASE_URL`, `LMSTUDIO_API_KEY`. |
30
+
31
+ ## Embedding backends
32
+
33
+ | `embedding=` | Module | Extra | Notes |
34
+ |--------------|--------|-------|-------|
35
+ | `deterministic` | `raggedy.embeddings.deterministic.DeterministicEmbedder` | — | Hashed bag-of-words. Real algorithm, no semantics — good for tests/CI. |
36
+ | `sentence-transformers` / `local` | `raggedy.embeddings.local.SentenceTransformersEmbedder` | `[local]` | GPU-aware. `device="auto"` picks CUDA → MPS → CPU. |
37
+ | `openai` | `raggedy.embeddings.openai.OpenAIEmbedder` | `[openai]` | `text-embedding-3-small` default; pass `dimensions=` to truncate. |
38
+ | `ollama` | `raggedy.embeddings.ollama.OllamaEmbedder` | — (base httpx) | Uses `/api/embed` (batch); falls back to `/api/embeddings`. |
39
+ | `lm_studio` / `lmstudio` | `raggedy.embeddings.openai.OpenAIEmbedder` | `[openai]` | Reuses the OpenAI client pointed at LM Studio. |
40
+
41
+ ## Chunkers
42
+
43
+ | `chunker_strategy=` | Module | Notes |
44
+ |---------------------|--------|-------|
45
+ | `semantic` | `raggedy.chunking.semantic.SemanticChunker` | Default. Paragraph + sentence boundaries, greedy merge, leading overlap. |
46
+ | `paragraph` | `raggedy.chunking.paragraph.ParagraphChunker` | One chunk per paragraph (no merging). |
47
+ | `sentence` | `raggedy.chunking.sentence.SentenceChunker` | Sentence-level greedy merge to `target_tokens`. |
48
+ | `fixed` | `raggedy.chunking.fixed.FixedChunker` | Fixed window with overlap; word-boundary snap. |
49
+
50
+ For a custom chunker, write any object satisfying the `Chunker` protocol and pass `chunker=` to `Raggedy(...)`. See [example 04](https://github.com/Dynamanic/raggedy/blob/main/examples/04_custom_chunker.py).
51
+
52
+ ## Persistence
53
+
54
+ | `recorder=` / `cost_backend=` | Module | Extra |
55
+ |---|---|---|
56
+ | `recorder=memory` | `raggedy.recording.memory.InMemoryRunRecorder` | — |
57
+ | `recorder=sqlite` | `raggedy.recording.sqlite.SqliteRunRecorder` | `[sqlitevec]` |
58
+ | `cost_backend=memory` | `raggedy.cost.tracker.InMemoryCostTracker` | — |
59
+ | `cost_backend=sqlite` | `raggedy.cost.sqlite.SqliteCostTracker` | `[sqlitevec]` |
60
+
61
+ The `SqliteCostTracker` uses an atomic `INSERT … ON CONFLICT DO UPDATE` to keep concurrent writers correct without an app-level lock.