PyPI - elfmem - Versions diffs - 0.1.0__tar.gz - Mend

elfmem 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

elfmem-0.1.0/.github/workflows/ci.yml +39 -0
elfmem-0.1.0/.github/workflows/publish.yml +60 -0
elfmem-0.1.0/.gitignore +10 -0
elfmem-0.1.0/.python-version +1 -0
elfmem-0.1.0/PKG-INFO +410 -0
elfmem-0.1.0/QUICKSTART.md +229 -0
elfmem-0.1.0/README.md +374 -0
elfmem-0.1.0/SIMULATION_OVERVIEW.md +407 -0
elfmem-0.1.0/START_HERE.md +236 -0
elfmem-0.1.0/alembic/env.py +63 -0
elfmem-0.1.0/alembic/script.py.mako +27 -0
elfmem-0.1.0/alembic.ini +41 -0
elfmem-0.1.0/docs/amgs_architecture.md +749 -0
elfmem-0.1.0/docs/amgs_instructions.md +227 -0
elfmem-0.1.0/docs/coding_principles.md +364 -0
elfmem-0.1.0/docs/notes.md +275 -0
elfmem-0.1.0/docs/plans/step_03_schema_storage.md +767 -0
elfmem-0.1.0/docs/plans/step_04_mock_adapters.md +346 -0
elfmem-0.1.0/docs/plans/step_05_learn_consolidate.md +682 -0
elfmem-0.1.0/docs/plans/step_06_recall_frame.md +785 -0
elfmem-0.1.0/docs/plans/step_07_curate.md +385 -0
elfmem-0.1.0/docs/plans/step_08_real_adapters.md +416 -0
elfmem-0.1.0/docs/plans/step_09_config_factory.md +446 -0
elfmem-0.1.0/docs/prompt_ab_testing.md +638 -0
elfmem-0.1.0/docs/prompt_team_01.md +18 -0
elfmem-0.1.0/docs/testing_principles.md +133 -0
elfmem-0.1.0/pyproject.toml +68 -0
elfmem-0.1.0/sim/EXPLORATIONS.md +1146 -0
elfmem-0.1.0/sim/README.md +278 -0
elfmem-0.1.0/sim/explorations/001_basic_decay.md +177 -0
elfmem-0.1.0/sim/explorations/002_confidence_trap.md +213 -0
elfmem-0.1.0/sim/explorations/003_scoring_walkthrough.md +244 -0
elfmem-0.1.0/sim/explorations/004_self_interest_model.md +395 -0
elfmem-0.1.0/sim/explorations/005_decay_sophistication.md +326 -0
elfmem-0.1.0/sim/explorations/006_self_as_system_prompt.md +751 -0
elfmem-0.1.0/sim/explorations/007_constitutional_self.md +878 -0
elfmem-0.1.0/sim/explorations/008_lifecycle_operations.md +895 -0
elfmem-0.1.0/sim/explorations/009_near_duplicates_and_curate_scheduling.md +568 -0
elfmem-0.1.0/sim/explorations/010_block_anatomy.md +488 -0
elfmem-0.1.0/sim/explorations/011_identifying_self_blocks.md +551 -0
elfmem-0.1.0/sim/explorations/012_self_tag_assignment.md +588 -0
elfmem-0.1.0/sim/explorations/013_edges.md +643 -0
elfmem-0.1.0/sim/explorations/014_edge_types.md +369 -0
elfmem-0.1.0/sim/explorations/015_context_frames_api.md +596 -0
elfmem-0.1.0/sim/explorations/016_custom_frames.md +428 -0
elfmem-0.1.0/sim/explorations/017_storage_layer.md +595 -0
elfmem-0.1.0/sim/explorations/018_duckdb_vs_sqlite.md +347 -0
elfmem-0.1.0/sim/explorations/019_database_tooling.md +795 -0
elfmem-0.1.0/sim/explorations/020_graph_layer.md +532 -0
elfmem-0.1.0/sim/explorations/021_hybrid_retrieval.md +449 -0
elfmem-0.1.0/sim/explorations/022_layer_model.md +557 -0
elfmem-0.1.0/sim/explorations/023_agent_usage.md +625 -0
elfmem-0.1.0/sim/explorations/024_system_refinement.md +954 -0
elfmem-0.1.0/sim/explorations/025_llm_gateway.md +854 -0
elfmem-0.1.0/sim/explorations/026_prompt_overrides.md +626 -0
elfmem-0.1.0/sim/explorations/027_implementation_priority.md +595 -0
elfmem-0.1.0/sim/explorations/_template.md +47 -0
elfmem-0.1.0/sim/playgrounds/README.md +110 -0
elfmem-0.1.0/sim/playgrounds/decay/decay.md +482 -0
elfmem-0.1.0/sim/playgrounds/frames/frames.md +518 -0
elfmem-0.1.0/sim/playgrounds/graph/graph.md +428 -0
elfmem-0.1.0/sim/playgrounds/lifecycle/lifecycle.md +509 -0
elfmem-0.1.0/sim/playgrounds/retrieval/retrieval.md +391 -0
elfmem-0.1.0/sim/playgrounds/scoring/scoring.md +528 -0
elfmem-0.1.0/src/elfmem/__init__.py +5 -0
elfmem-0.1.0/src/elfmem/adapters/__init__.py +0 -0
elfmem-0.1.0/src/elfmem/adapters/litellm.py +171 -0
elfmem-0.1.0/src/elfmem/adapters/mock.py +172 -0
elfmem-0.1.0/src/elfmem/adapters/models.py +38 -0
elfmem-0.1.0/src/elfmem/api.py +367 -0
elfmem-0.1.0/src/elfmem/config.py +187 -0
elfmem-0.1.0/src/elfmem/context/__init__.py +0 -0
elfmem-0.1.0/src/elfmem/context/contradiction.py +65 -0
elfmem-0.1.0/src/elfmem/context/frames.py +123 -0
elfmem-0.1.0/src/elfmem/context/rendering.py +94 -0
elfmem-0.1.0/src/elfmem/db/__init__.py +6 -0
elfmem-0.1.0/src/elfmem/db/engine.py +72 -0
elfmem-0.1.0/src/elfmem/db/models.py +113 -0
elfmem-0.1.0/src/elfmem/db/queries.py +630 -0
elfmem-0.1.0/src/elfmem/memory/__init__.py +0 -0
elfmem-0.1.0/src/elfmem/memory/blocks.py +42 -0
elfmem-0.1.0/src/elfmem/memory/dedup.py +66 -0
elfmem-0.1.0/src/elfmem/memory/graph.py +85 -0
elfmem-0.1.0/src/elfmem/memory/retrieval.py +212 -0
elfmem-0.1.0/src/elfmem/operations/__init__.py +0 -0
elfmem-0.1.0/src/elfmem/operations/consolidate.py +190 -0
elfmem-0.1.0/src/elfmem/operations/curate.py +144 -0
elfmem-0.1.0/src/elfmem/operations/learn.py +53 -0
elfmem-0.1.0/src/elfmem/operations/recall.py +139 -0
elfmem-0.1.0/src/elfmem/ports/__init__.py +0 -0
elfmem-0.1.0/src/elfmem/ports/services.py +39 -0
elfmem-0.1.0/src/elfmem/prompts.py +79 -0
elfmem-0.1.0/src/elfmem/py.typed +0 -0
elfmem-0.1.0/src/elfmem/scoring.py +145 -0
elfmem-0.1.0/src/elfmem/session.py +87 -0
elfmem-0.1.0/src/elfmem/types.py +93 -0
elfmem-0.1.0/tests/__init__.py +0 -0
elfmem-0.1.0/tests/conftest.py +44 -0
elfmem-0.1.0/tests/test_curate.py +470 -0
elfmem-0.1.0/tests/test_lifecycle.py +505 -0
elfmem-0.1.0/tests/test_mock_adapters.py +417 -0
elfmem-0.1.0/tests/test_retrieval.py +471 -0
elfmem-0.1.0/tests/test_scoring.py +256 -0
elfmem-0.1.0/tests/test_storage.py +621 -0
elfmem-0.1.0/uv.lock +2299 -0

elfmem-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,39 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  test:
+    name: Test (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+      - name: Set up Python ${{ matrix.python-version }}
+        run: uv python install ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: uv sync --extra dev
+      - name: Lint (ruff)
+        run: uv run ruff check src/ tests/
+      - name: Type-check (mypy)
+        run: uv run mypy --ignore-missing-imports src/elfmem/
+      - name: Test (pytest)
+        run: uv run pytest -q

elfmem-0.1.0/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,60 @@
+name: Publish to PyPI
+on:
+  push:
+    tags:
+      - "v*.*.*"
+permissions:
+  contents: read
+jobs:
+  build:
+    name: Build distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+      - name: Set up Python
+        run: uv python install 3.11
+      - name: Install dependencies and run tests
+        run: |
+          uv sync --extra dev
+          uv run pytest -q
+      - name: Build package
+        run: uv build
+      - name: Upload distribution artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+  publish:
+    name: Publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write  # required for OIDC trusted publishing
+    steps:
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Download distribution artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - name: Publish to PyPI
+        run: uv publish --trusted-publishing always

elfmem-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,10 @@
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv

elfmem-0.1.0/.python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.12

elfmem-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,410 @@
+Metadata-Version: 2.4
+Name: elfmem
+Version: 0.1.0
+Summary: Self-aware adaptive memory for LLM agents
+Project-URL: Homepage, https://github.com/emson/elfmem
+Project-URL: Repository, https://github.com/emson/elfmem
+Project-URL: Issues, https://github.com/emson/elfmem/issues
+Author: emson
+License: MIT
+Keywords: agents,llm,memory,rag,sqlite
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Typing :: Typed
+Requires-Python: >=3.11
+Requires-Dist: aiosqlite>=0.19
+Requires-Dist: alembic>=1.13
+Requires-Dist: greenlet>=3.3.2
+Requires-Dist: instructor>=1.2
+Requires-Dist: litellm>=1.30
+Requires-Dist: numpy>=1.26
+Requires-Dist: pydantic>=2.0
+Requires-Dist: pyyaml>=6.0
+Requires-Dist: sqlalchemy>=2.0
+Provides-Extra: dev
+Requires-Dist: mypy>=1.8; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Requires-Dist: ruff>=0.3; extra == 'dev'
+Requires-Dist: types-pyyaml; extra == 'dev'
+Description-Content-Type: text/markdown
+# elfmem
+**Adaptive, self-aware memory for LLM agents.**
+elfmem gives your LLM agent a memory that grows, evolves, and forgets — just like a human's. Knowledge that gets used survives; knowledge that doesn't fades away. Identity persists across sessions. Context is always relevant.
+```python
+import asyncio
+from elfmem import MemorySystem
+async def main():
+    system = await MemorySystem.from_config("agent.db", {
+        "llm": {"model": "claude-sonnet-4-6"},
+        "embeddings": {"model": "text-embedding-3-small", "dimensions": 1536},
+    })
+    async with system.session():
+        # Teach the agent something
+        await system.learn("Use Celery with Redis for background tasks in Django.")
+        await system.learn("I always explain my reasoning before giving recommendations.")
+        # Retrieve relevant context for a prompt
+        identity = await system.frame("self")         # Who am I?
+        context  = await system.frame("attention",    # What do I know about this?
+                                      query="background job processing")
+        print(identity.text)   # Agent identity, values, style
+        print(context.text)    # Relevant knowledge, ranked by importance
+asyncio.run(main())
+```
+## Features
+- **Adaptive decay** — Knowledge survives when reinforced through use, fades when ignored. Session-aware clock means your agent's memory doesn't decay over weekends.
+- **SELF frame** — Persistent agent identity. Values, style, and constraints survive across sessions with near-permanent decay rates.
+- **Hybrid retrieval** — 4-stage pipeline: pre-filter, vector search, graph expansion, composite scoring. Finds knowledge that's relevant *and* important.
+- **Knowledge graph** — Semantic edges between memory blocks. Co-retrieved knowledge strengthens connections. Graph expansion recovers related-but-not-similar context.
+- **Contradiction detection** — LLM-powered detection of conflicting knowledge. Newer, higher-confidence blocks win.
+- **Near-duplicate resolution** — Detects when new knowledge updates existing knowledge. Old block archived, new block inherits history.
+- **Zero infrastructure** — SQLite backend. No Redis, no Postgres, no vector database. One file, fully portable.
+- **Any LLM provider** — LiteLLM backend supports 100+ providers. Switch from OpenAI to Anthropic to local Ollama with a config change.
+## Installation
+```bash
+uv add elfmem
+```
+Or with pip:
+```bash
+pip install elfmem
+```
+Requires Python 3.11+.
+## How It Works
+### The Lifecycle
+Every piece of knowledge follows the same path:
+```
+learn()        →  Instant ingestion. Content-hash dedup. No API calls.
+consolidate()  →  Batch processing. Embeddings, self-alignment scoring,
+                  tag inference, near-duplicate detection, graph edges.
+recall()       →  4-stage hybrid retrieval. Reinforces returned blocks.
+curate()       →  Maintenance. Archives decayed blocks, prunes weak edges,
+                  reinforces top-scoring knowledge.
+```
+### Three Frames
+Frames are pre-configured retrieval pipelines optimized for different contexts:
+| Frame | Purpose | Scoring Priority | Use Case |
+|-------|---------|-----------------|----------|
+| **SELF** | Agent identity | Confidence, reinforcement, centrality | System prompt injection |
+| **ATTENTION** | Query-relevant knowledge | Similarity, recency | RAG-style retrieval |
+| **TASK** | Goal-oriented context | Balanced across all signals | Task planning |
+```python
+# Identity context — cached, no embedding needed
+self_ctx = await system.frame("self")
+# Knowledge retrieval — hybrid pipeline with graph expansion
+attn_ctx = await system.frame("attention", query="async error handling")
+# Task context — balanced scoring, goal blocks guaranteed
+task_ctx = await system.frame("task", query="refactor the API layer")
+```
+### Decay Tiers
+Knowledge decays at different rates based on its nature:
+| Tier | Half-life | Use Case |
+|------|-----------|----------|
+| Permanent | ~80,000 hours | Constitutional beliefs, core identity |
+| Durable | ~693 hours | Stable preferences, learned values |
+| Standard | ~69 hours | General knowledge |
+| Ephemeral | ~14 hours | Session observations, temporary facts |
+Decay is **session-aware**: the clock only ticks during active use. Your agent's memory doesn't degrade over holidays or downtime.
+### Composite Scoring
+Every block is scored across five dimensions:
+```
+Score = w_similarity    * cosine_similarity(query, block)
+      + w_confidence    * block.confidence
+      + w_recency       * exp(-lambda * hours_since_reinforced)
+      + w_centrality    * normalized_weighted_degree(block)
+      + w_reinforcement * log(1 + count) / log(1 + max_count)
+```
+Each frame uses different weights. SELF emphasizes confidence and reinforcement. ATTENTION emphasizes similarity and recency.
+## Configuration
+### Minimal (defaults)
+```python
+system = await MemorySystem.from_config("agent.db")
+# Uses claude-sonnet-4-6 for LLM, text-embedding-3-small for embeddings
+# Requires ANTHROPIC_API_KEY environment variable
+```
+### YAML config file
+```yaml
+# elfmem.yaml
+llm:
+  model: "claude-sonnet-4-6"
+  contradiction_model: "claude-opus-4-6"  # higher precision for contradictions
+embeddings:
+  model: "text-embedding-3-small"
+  dimensions: 1536
+memory:
+  inbox_threshold: 10
+  curate_interval_hours: 40
+  self_alignment_threshold: 0.70
+  prune_threshold: 0.05
+```
+```python
+system = await MemorySystem.from_config("agent.db", "elfmem.yaml")
+```
+### Local models (no API key)
+```yaml
+llm:
+  model: "ollama/llama3.2"
+  base_url: "http://localhost:11434"
+embeddings:
+  model: "ollama/nomic-embed-text"
+  dimensions: 768
+  base_url: "http://localhost:11434"
+```
+### Environment variables
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...
+# or
+export OPENAI_API_KEY=sk-...
+# or any provider LiteLLM supports
+```
+API keys are read by LiteLLM from standard environment variables. They never appear in config files.
+## Agent Integration Pattern
+```python
+async def run_turn(system, user_message):
+    # 1. Assemble context
+    self_ctx = await system.frame("self")
+    attn_ctx = await system.frame("attention", query=user_message)
+    # 2. Build prompt with memory context
+    prompt = f"""
+    {self_ctx.text}
+    {attn_ctx.text}
+    User: {user_message}
+    """
+    # 3. Generate response
+    response = await llm.complete(prompt)
+    # 4. Learn from the interaction
+    if worth_remembering(response):
+        await system.learn(extract_knowledge(response))
+    return response
+```
+## API Reference
+### MemorySystem
+```python
+# Factory
+system = await MemorySystem.from_config(db_path, config=None)
+# Session management (required)
+async with system.session():
+    ...
+# Write
+result = await system.learn(content, tags=None, category="knowledge")
+# Read
+frame_result = await system.frame(name, query=None, top_k=5)
+blocks = await system.recall(name, query=None, top_k=5)  # raw, no side effects
+# Maintenance (usually automatic)
+await system.consolidate()  # process inbox → active
+await system.curate()       # archive decayed, prune edges, reinforce top-N
+```
+### Return Types
+```python
+LearnResult(block_id, status)           # "created" | "duplicate_rejected"
+FrameResult(text, blocks, frame_name)   # rendered text + scored blocks
+ConsolidateResult(processed, promoted, deduplicated, edges_created)
+CurateResult(archived, edges_pruned, reinforced)
+```
+### Custom Prompts
+Override the LLM prompts for domain-specific agents:
+```yaml
+prompts:
+  self_alignment: |
+    You are evaluating a memory block for a medical AI assistant...
+    {self_context}
+    {block}
+    Respond: {"score": <float>}
+  valid_self_tags:
+    - "self/constitutional"
+    - "self/domain/oncology"
+    - "self/regulatory/hipaa"
+```
+### Custom Adapters
+For full control, implement the port protocols directly:
+```python
+from elfmem.ports.services import LLMService, EmbeddingService
+class MyLLMService:
+    async def score_self_alignment(self, block: str, self_context: str) -> float: ...
+    async def infer_self_tags(self, block: str, self_context: str) -> list[str]: ...
+    async def detect_contradiction(self, block_a: str, block_b: str) -> float: ...
+system = MemorySystem(engine, llm_service=MyLLMService(), embedding_service=MyEmbedder())
+```
+## Architecture
+```
+src/elfmem/
+├── api.py                  # MemorySystem — public API
+├── config.py               # ElfmemConfig — Pydantic configuration
+├── scoring.py              # Composite scoring formula (frozen)
+├── types.py                # Domain types — shared vocabulary
+├── prompts.py              # LLM prompt templates
+├── session.py              # Session lifecycle, active hours tracking
+├── ports/
+│   └── services.py         # LLMService + EmbeddingService protocols
+├── adapters/
+│   ├── mock.py             # Deterministic mocks for testing
+│   ├── litellm.py          # Real adapters (LiteLLM + instructor)
+│   └── models.py           # Pydantic response models
+├── db/
+│   ├── models.py           # SQLAlchemy Core table definitions
+│   ├── engine.py           # Async engine factory
+│   └── queries.py          # All database operations
+├── memory/
+│   ├── blocks.py           # Block state, content hashing, decay tiers
+│   ├── dedup.py            # Near-duplicate detection and resolution
+│   ├── graph.py            # Centrality, expansion, edge reinforcement
+│   └── retrieval.py        # 4-stage hybrid retrieval pipeline
+├── context/
+│   ├── frames.py           # Frame definitions, registry, cache
+│   ├── rendering.py        # Blocks → rendered text
+│   └── contradiction.py    # Contradiction suppression
+└── operations/
+    ├── learn.py            # learn() — fast-path ingestion
+    ├── consolidate.py      # consolidate() — batch promotion
+    ├── recall.py           # recall() — retrieval + reinforcement
+    └── curate.py           # curate() — maintenance
+```
+**Four layers, clear boundaries:**
+| Layer | Responsibility | Side Effects |
+|-------|---------------|-------------|
+| **Storage** (db/) | Tables, queries, engine | Database writes |
+| **Memory** (memory/) | Blocks, dedup, graph, retrieval | None (pure) |
+| **Context** (context/) | Frames, rendering, contradictions | None (pure) |
+| **Operations** (operations/) | Orchestration, lifecycle | All side effects |
+## Development
+```bash
+# Clone
+git clone https://github.com/emson/elfmem.git
+cd elfmem
+# Install with dev dependencies
+uv sync --extra dev
+# Run tests (no API key needed — uses deterministic mocks)
+uv run pytest
+# Type checking
+uv run mypy --ignore-missing-imports src/elfmem/
+# Lint
+uv run ruff check src/ tests/
+```
+### Testing Philosophy
+All tests run against deterministic mock services. No API keys, no network calls, fully reproducible. The mock embedding service produces hash-seeded vectors — same input always gives the same embedding. The mock LLM service returns configurable scores and tags via substring matching.
+```python
+from elfmem.adapters.mock import make_mock_llm, make_mock_embedding
+# Control exactly what the LLM returns
+llm = make_mock_llm(
+    alignment_overrides={"identity": 0.95},
+    tag_overrides={"identity": ["self/value"]},
+)
+# Control similarity between specific texts
+embedding = make_mock_embedding(
+    similarity_overrides={
+        frozenset({"cats are great", "dogs are great"}): 0.85,
+    },
+)
+```
+## Design Decisions
+| Decision | Rationale |
+|----------|-----------|
+| SQLAlchemy Core, not ORM | Bulk updates, embedding BLOBs, N+1 centrality queries |
+| Session-aware decay, not wall-clock | Knowledge survives holidays and downtime |
+| Soft bias for identity, not hard gates | Everything is learned; self-aligned knowledge just survives longer |
+| Retrieval is pure; reinforcement is separate | Clean separation of read path and side effects |
+| LiteLLM as unified backend | One adapter for 100+ providers; switch with config |
+| Mock-first testing | All logic verified without API keys; adapters are thin wrappers |
+## License
+MIT
+## Acknowledgements
+elfmem was designed through 26 structured explorations and 6 subsystem playgrounds, building mathematical confidence in every architectural decision before writing code. The complete design documentation is in `sim/explorations/`.