amsdal_ml 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. amsdal_ml-0.1.4/CLAUDE.md +171 -0
  2. amsdal_ml-0.1.4/PKG-INFO +235 -0
  3. amsdal_ml-0.1.4/README.md +218 -0
  4. amsdal_ml-0.1.4/RELEASE.md +180 -0
  5. amsdal_ml-0.1.4/amsdal_ml/__about__.py +1 -0
  6. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/agents/retriever_tool.py +12 -2
  7. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/mcp_client/stdio_client.py +2 -2
  8. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/change-logs.md +10 -0
  9. amsdal_ml-0.1.4/latest-changelogs.md +9 -0
  10. amsdal_ml-0.1.3/PKG-INFO +0 -69
  11. amsdal_ml-0.1.3/README.md +0 -52
  12. amsdal_ml-0.1.3/amsdal_ml/__about__.py +0 -1
  13. amsdal_ml-0.1.3/latest-changelogs.md +0 -6
  14. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/.amsdal/.dependencies +0 -0
  15. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/.amsdal/.environment +0 -0
  16. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/.amsdal/.secrets +0 -0
  17. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/.amsdal-cli +0 -0
  18. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/.github/workflows/ci.yml +0 -0
  19. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/.github/workflows/release.yml +0 -0
  20. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/.github/workflows/tag_check.yml +0 -0
  21. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/.gitignore +0 -0
  22. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/Third-Party Materials - AMSDAL Dependencies - License Notices.md +0 -0
  23. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/__init__.py +0 -0
  24. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/agents/__init__.py +0 -0
  25. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/agents/agent.py +0 -0
  26. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/agents/default_qa_agent.py +0 -0
  27. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/agents/promts/__init__.py +0 -0
  28. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/agents/promts/react_chat.prompt +0 -0
  29. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/app.py +0 -0
  30. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/fileio/__init__.py +0 -0
  31. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/fileio/base_loader.py +0 -0
  32. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/fileio/openai_loader.py +0 -0
  33. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/mcp_client/__init__.py +0 -0
  34. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/mcp_client/base.py +0 -0
  35. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/mcp_client/http_client.py +0 -0
  36. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/mcp_server/__init__.py +0 -0
  37. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/mcp_server/server_retriever_stdio.py +0 -0
  38. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/migrations/0000_initial.py +0 -0
  39. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_config.py +0 -0
  40. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_ingesting/__init__.py +0 -0
  41. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_ingesting/default_ingesting.py +0 -0
  42. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_ingesting/embedding_data.py +0 -0
  43. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_ingesting/ingesting.py +0 -0
  44. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_ingesting/openai_ingesting.py +0 -0
  45. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_models/__init__.py +0 -0
  46. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_models/models.py +0 -0
  47. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_models/openai_model.py +0 -0
  48. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_retrievers/__init__.py +0 -0
  49. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_retrievers/default_retriever.py +0 -0
  50. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_retrievers/openai_retriever.py +0 -0
  51. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/ml_retrievers/retriever.py +0 -0
  52. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/models/__init__.py +0 -0
  53. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/models/embedding_model.py +0 -0
  54. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/amsdal_ml/py.typed +0 -0
  55. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/config.yml +0 -0
  56. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/license_check.py +0 -0
  57. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/pyproject.toml +0 -0
  58. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/scripts/release.sh +0 -0
  59. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/scripts/tag_check.sh +0 -0
  60. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/tests/__init__.py +0 -0
  61. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/tests/agents_tests/__init__.py +0 -0
  62. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/tests/agents_tests/test_arun.py +0 -0
  63. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/tests/agents_tests/test_astream.py +0 -0
  64. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/tests/agents_tests/test_astream_final_only.py +0 -0
  65. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/tests/agents_tests/test_fakes.py +0 -0
  66. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/tests/agents_tests/test_tool_call_arguments_async.py +0 -0
  67. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/tests/conftest.py +0 -0
  68. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/tests/test_openai_model.py +0 -0
  69. {amsdal_ml-0.1.3 → amsdal_ml-0.1.4}/uv.lock +0 -0
@@ -0,0 +1,171 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ amsdal-ml is a machine learning plugin for the AMSDAL Framework that provides embeddings, vector search, and AI-driven features. It supports both synchronous and asynchronous modes, with primary focus on async operations using OpenAI models.
8
+
9
+ ## Development Commands
10
+
11
+ ### Environment Setup
12
+ ```bash
13
+ # Install dependencies using hatch/uv
14
+ pip install --upgrade uv hatch==1.14.2
15
+ hatch env create
16
+ hatch run sync
17
+ ```
18
+
19
+ ### Testing
20
+ ```bash
21
+ # Run all tests with coverage
22
+ hatch run cov
23
+
24
+ # Run specific test file
25
+ hatch run test tests/test_openai_model.py
26
+
27
+ # Run tests with pytest directly (after env setup)
28
+ pytest tests/
29
+ pytest tests/agents_tests/ # Run agent-specific tests
30
+ ```
31
+
32
+ ### Code Quality
33
+ ```bash
34
+ # Run all checks (style + typing)
35
+ hatch run all
36
+
37
+ # Style checks only
38
+ hatch run style
39
+
40
+ # Format code (fix style issues)
41
+ hatch run fmt
42
+
43
+ # Type checking
44
+ hatch run typing
45
+ ```
46
+
47
+ ### Dependency Management
48
+ ```bash
49
+ # Sync dependencies
50
+ hatch run sync
51
+
52
+ # Update lock file
53
+ hatch run lock
54
+
55
+ # Upgrade all dependencies
56
+ hatch run lock-upgrade
57
+ ```
58
+
59
+ ### AMSDAL CLI Commands
60
+ ```bash
61
+ # Generate new model
62
+ amsdal generate model ModelName --format py
63
+
64
+ # Generate property for model
65
+ amsdal generate property --model ModelName property_name
66
+
67
+ # Generate transaction
68
+ amsdal generate transaction TransactionName
69
+
70
+ # Generate hook
71
+ amsdal generate hook --model ModelName on_create
72
+ ```
73
+
74
+ ## Architecture
75
+
76
+ ### Core Components
77
+
78
+ **ML Models** (`amsdal_ml/ml_models/`)
79
+ - Abstract base class `MLModel` defines the interface for all ML models
80
+ - Supports both sync/async invoke and streaming methods
81
+ - Primary implementation uses OpenAI API
82
+ - All models must implement `setup()`, `teardown()`, `invoke()`, `ainvoke()`, `stream()`, and `astream()`
83
+ - Custom error hierarchy: `ModelError`, `ModelConnectionError`, `ModelRateLimitError`, `ModelAPIError`
84
+
85
+ **ML Ingesting** (`amsdal_ml/ml_ingesting/`)
86
+ - `MLIngesting` abstract base handles text generation and embedding creation from data
87
+ - Creates `EmbeddingData` records that link embeddings to source objects
88
+ - Supports chunk-based processing with configurable depth and token limits
89
+ - Both sync/async methods for text generation and embedding
90
+
91
+ **ML Retrievers** (`amsdal_ml/ml_retrievers/`)
92
+ - `MLRetriever` provides semantic search via similarity_search/asimilarity_search
93
+ - Returns `RetrievalChunk` objects with object metadata, chunk text, distance, and tags
94
+ - Supports filtering by include/exclude tags
95
+ - Configurable k parameter for number of results
96
+
97
+ **Agents** (`amsdal_ml/agents/`)
98
+ - Abstract `Agent` base class for Q&A and task-oriented agents
99
+ - Async-first design (sync methods raise NotImplementedError)
100
+ - Returns `AgentOutput` with answer, used_tools, and citations
101
+ - Supports streaming responses via `astream()`
102
+ - File attachments supported through `FileAttachment` interface
103
+
104
+ **MCP Integration**
105
+ - **Server** (`amsdal_ml/mcp_server/`): Exposes retriever search as MCP tool via stdio
106
+ - **Client** (`amsdal_ml/mcp_client/`): Supports both stdio and HTTP transports for calling MCP tools
107
+ - Server accepts base64-encoded AMSDAL config for initialization
108
+
109
+ **File I/O** (`amsdal_ml/fileio/`)
110
+ - `BaseFileLoader` abstract class for uploading files to ML providers
111
+ - `FileAttachment` represents processed attachments (types: PLAIN_TEXT, FILE_ID)
112
+ - `FileItem` helper for creating attachments from paths, bytes, or strings
113
+
114
+ ### Data Models
115
+
116
+ **EmbeddingModel** (`amsdal_ml/models/embedding_model.py`)
117
+ - Core model storing embeddings in database
118
+ - Links to source object via `data_object_class` and `data_object_id`
119
+ - Stores 1536-dimensional vectors (OpenAI text-embedding-3-small default)
120
+ - Includes chunk_index, raw_text, tags, and ml_metadata fields
121
+
122
+ ### Configuration
123
+
124
+ **MLConfig** (`amsdal_ml/ml_config.py`)
125
+ - Loaded from `.env` file using pydantic-settings
126
+ - Key settings:
127
+ - `ml_model_class`: Path to ML model implementation
128
+ - `ml_retriever_class`: Path to retriever implementation
129
+ - `ml_ingesting_class`: Path to ingesting implementation
130
+ - `llm_model_name`: Default 'gpt-4o'
131
+ - `embed_model_name`: Default 'text-embedding-3-small'
132
+ - `embed_max_depth`, `embed_max_chunks`, `embed_max_tokens_per_chunk`: Chunking parameters
133
+ - `retriever_default_k`: Number of results for similarity search
134
+ - `openai_api_key`, `claude_api_key`: API credentials
135
+ - `embedding_targets`: List of models to embed
136
+
137
+ **Database Config** (`config.yml`)
138
+ - Defines AMSDAL connections (sqlite_history, sqlite_state, lock)
139
+ - Resources config maps repository and lakehouse to connections
140
+ - Set `async_mode: true` for async operations
141
+
142
+ ## Code Style
143
+
144
+ - Python 3.11+ required
145
+ - Uses Ruff for linting and formatting with 120-char line length
146
+ - Single quotes enforced (`quote-style = "single"`)
147
+ - Import ordering: force-single-line with order-by-type
148
+ - Type checking via mypy with strict settings (disallow_any_generics, check_untyped_defs)
149
+ - Excludes migrations directory from linting
150
+
151
+ ## Testing
152
+
153
+ - Uses pytest with pytest-asyncio for async tests
154
+ - Test fixtures in `tests/conftest.py` provide mocked OpenAI clients
155
+ - `OPENAI_API_KEY` set to dummy value in tests via fixture
156
+ - Coverage tracking with coverage.py
157
+
158
+ ## CI/CD
159
+
160
+ The project uses self-hosted runners with two jobs:
161
+ 1. **license-check**: Validates third-party licenses using `license_check.py`
162
+ 2. **test-lint**: Runs on Python 3.11 and 3.12, executes `hatch run all` (style+typing) and `hatch run cov`
163
+
164
+ ## Key Patterns
165
+
166
+ 1. **Async-First**: Most components prioritize async methods; sync methods often raise NotImplementedError
167
+ 2. **Abstract Base Classes**: Heavy use of ABCs to define interfaces for models, retrievers, ingesters, and agents
168
+ 3. **Configuration via Pydantic**: Settings loaded from environment with type validation
169
+ 4. **AMSDAL Integration**: Uses AMSDAL's model system, manager, and connection framework
170
+ 5. **Chunking Strategy**: Text split into chunks with metadata preservation for better embedding quality
171
+ 6. **Tag-Based Filtering**: Embeddings tagged for fine-grained retrieval control
@@ -0,0 +1,235 @@
1
+ Metadata-Version: 2.4
2
+ Name: amsdal_ml
3
+ Version: 0.1.4
4
+ Summary: amsdal_ml plugin for AMSDAL Framework
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: aiohttp==3.12.15
7
+ Requires-Dist: amsdal-cli>=0.5.7
8
+ Requires-Dist: amsdal-data>=0.5.9
9
+ Requires-Dist: amsdal-models>=0.5.9
10
+ Requires-Dist: amsdal-utils>=0.5.4
11
+ Requires-Dist: amsdal>=0.5.6
12
+ Requires-Dist: mcp>=0.1
13
+ Requires-Dist: openai==1.100.2
14
+ Requires-Dist: pydantic-settings==2.10.1
15
+ Requires-Dist: pydantic==2.11.7
16
+ Description-Content-Type: text/markdown
17
+
18
+ # AMSDAL ML
19
+
20
+ [![CI](https://github.com/amsdal/amsdal_ml/actions/workflows/ci.yml/badge.svg)](https://github.com/amsdal/amsdal_ml/actions/workflows/ci.yml)
21
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
22
+
23
+ Machine learning plugin for the AMSDAL Framework, providing embeddings, vector search, semantic retrieval, and AI agents with support for OpenAI models.
24
+
25
+ ## Features
26
+
27
+ - **Vector Embeddings**: Generate and store embeddings for any AMSDAL model with automatic chunking
28
+ - **Semantic Search**: Query your data using natural language with tag-based filtering
29
+ - **AI Agents**: Build Q&A systems with streaming support and citation tracking
30
+ - **Async-First**: Optimized for high-performance async operations
31
+ - **MCP Integration**: Expose and consume tools via Model Context Protocol (stdio/HTTP)
32
+ - **File Attachments**: Process and embed documents with built-in loaders
33
+ - **Extensible**: Abstract base classes for custom models, retrievers, and ingesters
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install amsdal-ml
39
+ ```
40
+
41
+ ### Requirements
42
+
43
+ - Python 3.11 or higher
44
+ - AMSDAL Framework 0.5.6+
45
+ - OpenAI API key (for default implementations)
46
+
47
+ ## Quick Start
48
+
49
+ ### 1. Configuration
50
+
51
+ Create a `.env` file in your project root:
52
+
53
+ ```env
54
+ OPENAI_API_KEY=sk-your-api-key-here
55
+ async_mode=true
56
+ ml_model_class=amsdal_ml.ml_models.openai_model.OpenAIModel
57
+ ml_retriever_class=amsdal_ml.ml_retrievers.openai_retriever.OpenAIRetriever
58
+ ml_ingesting_class=amsdal_ml.ml_ingesting.openai_ingesting.OpenAIIngesting
59
+ ```
60
+
61
+ Create a `config.yml` for AMSDAL connections:
62
+
63
+ ```yaml
64
+ application_name: my-ml-app
65
+ async_mode: true
66
+ connections:
67
+ - name: sqlite_state
68
+ backend: sqlite-state-async
69
+ credentials:
70
+ - db_path: ./warehouse/state.sqlite3
71
+ - check_same_thread: false
72
+ - name: lock
73
+ backend: amsdal_data.lock.implementations.thread_lock.ThreadLock
74
+ resources_config:
75
+ repository:
76
+ default: sqlite_state
77
+ lock: lock
78
+ ```
79
+
80
+ ### 2. Generate Embeddings
81
+
82
+ ```python
83
+ from amsdal_ml.ml_ingesting.openai_ingesting import OpenAIIngesting
84
+ from amsdal_ml.ml_config import ml_config
85
+
86
+ # Initialize ingesting
87
+ ingester = OpenAIIngesting(
88
+ model=MyModel,
89
+ embedding_field='embedding',
90
+ )
91
+
92
+ # Generate embeddings for an instance
93
+ instance = MyModel(content='Your text here')
94
+ embeddings = await ingester.agenerate_embeddings(instance)
95
+ await ingester.asave(embeddings, instance)
96
+ ```
97
+
98
+ ### 3. Semantic Search
99
+
100
+ ```python
101
+ from amsdal_ml.ml_retrievers.openai_retriever import OpenAIRetriever
102
+
103
+ retriever = OpenAIRetriever()
104
+
105
+ # Search for relevant content
106
+ results = await retriever.asimilarity_search(
107
+ query='What is machine learning?',
108
+ k=5,
109
+ include_tags=['documentation']
110
+ )
111
+
112
+ for chunk in results:
113
+ print(f'{chunk.object_class}:{chunk.object_id} - {chunk.raw_text}')
114
+ ```
115
+
116
+ ### 4. Build an AI Agent
117
+
118
+ ```python
119
+ from amsdal_ml.agents.default_qa_agent import DefaultQAAgent
120
+
121
+ agent = DefaultQAAgent()
122
+
123
+ # Ask questions
124
+ output = await agent.arun('Explain vector embeddings')
125
+ print(output.answer)
126
+ print(f'Used tools: {output.used_tools}')
127
+
128
+ # Stream responses
129
+ async for chunk in agent.astream('What is semantic search?'):
130
+ print(chunk, end='', flush=True)
131
+ ```
132
+
133
+ ## Architecture
134
+
135
+ ### Core Components
136
+
137
+ - **`MLModel`**: Abstract interface for LLM inference (invoke, stream, with attachments)
138
+ - **`MLIngesting`**: Generate text and embeddings from data objects with chunking
139
+ - **`MLRetriever`**: Semantic similarity search with tag-based filtering
140
+ - **`Agent`**: Q&A and task-oriented agents with streaming and citations
141
+ - **`EmbeddingModel`**: Database model storing 1536-dimensional vectors linked to source objects
142
+ - **MCP Server/Client**: Expose retrievers as tools or consume external MCP services
143
+
144
+ ### Configuration
145
+
146
+ All settings are managed via `MLConfig` in `.env`:
147
+
148
+ ```env
149
+ # Model Configuration
150
+ llm_model_name=gpt-4o
151
+ llm_temperature=0.0
152
+ embed_model_name=text-embedding-3-small
153
+
154
+ # Chunking Parameters
155
+ embed_max_depth=2
156
+ embed_max_chunks=10
157
+ embed_max_tokens_per_chunk=800
158
+
159
+ # Retrieval Settings
160
+ retriever_default_k=8
161
+ ```
162
+
163
+ ## Development
164
+
165
+ ### Setup
166
+
167
+ ```bash
168
+ # Install dependencies
169
+ pip install --upgrade uv hatch==1.14.2
170
+ hatch env create
171
+ hatch run sync
172
+ ```
173
+
174
+ ### Testing
175
+
176
+ ```bash
177
+ # Run all tests with coverage
178
+ hatch run cov
179
+
180
+ # Run specific tests
181
+ hatch run test tests/test_openai_model.py
182
+
183
+ # Watch mode
184
+ pytest tests/ -v
185
+ ```
186
+
187
+ ### Code Quality
188
+
189
+ ```bash
190
+ # Run all checks (style + typing)
191
+ hatch run all
192
+
193
+ # Format code
194
+ hatch run fmt
195
+
196
+ # Type checking
197
+ hatch run typing
198
+ ```
199
+
200
+ ### AMSDAL CLI
201
+
202
+ ```bash
203
+ # Generate a new model
204
+ amsdal generate model MyModel --format py
205
+
206
+ # Generate property
207
+ amsdal generate property --model MyModel embedding_field
208
+
209
+ # Generate transaction
210
+ amsdal generate transaction ProcessEmbeddings
211
+
212
+ # Generate hook
213
+ amsdal generate hook --model MyModel on_create
214
+ ```
215
+
216
+ ## MCP Server
217
+
218
+ Run the retriever as an MCP server for integration with Claude Desktop or other MCP clients:
219
+
220
+ ```bash
221
+ python -m amsdal_ml.mcp_server.server_retriever_stdio \
222
+ --amsdal-config "$(echo '{"async_mode": true, ...}' | base64)"
223
+ ```
224
+
225
+ The server exposes a `search` tool for semantic search in your knowledge base.
226
+
227
+ ## License
228
+
229
+ See `amsdal_ml/Third-Party Materials - AMSDAL Dependencies - License Notices.md` for dependency licenses.
230
+
231
+ ## Links
232
+
233
+ - [AMSDAL Framework](https://github.com/amsdal/amsdal)
234
+ - [Documentation](https://docs.amsdal.com)
235
+ - [Issue Tracker](https://github.com/amsdal/amsdal_ml/issues)
@@ -0,0 +1,218 @@
1
+ # AMSDAL ML
2
+
3
+ [![CI](https://github.com/amsdal/amsdal_ml/actions/workflows/ci.yml/badge.svg)](https://github.com/amsdal/amsdal_ml/actions/workflows/ci.yml)
4
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
5
+
6
+ Machine learning plugin for the AMSDAL Framework, providing embeddings, vector search, semantic retrieval, and AI agents with support for OpenAI models.
7
+
8
+ ## Features
9
+
10
+ - **Vector Embeddings**: Generate and store embeddings for any AMSDAL model with automatic chunking
11
+ - **Semantic Search**: Query your data using natural language with tag-based filtering
12
+ - **AI Agents**: Build Q&A systems with streaming support and citation tracking
13
+ - **Async-First**: Optimized for high-performance async operations
14
+ - **MCP Integration**: Expose and consume tools via Model Context Protocol (stdio/HTTP)
15
+ - **File Attachments**: Process and embed documents with built-in loaders
16
+ - **Extensible**: Abstract base classes for custom models, retrievers, and ingesters
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install amsdal-ml
22
+ ```
23
+
24
+ ### Requirements
25
+
26
+ - Python 3.11 or higher
27
+ - AMSDAL Framework 0.5.6+
28
+ - OpenAI API key (for default implementations)
29
+
30
+ ## Quick Start
31
+
32
+ ### 1. Configuration
33
+
34
+ Create a `.env` file in your project root:
35
+
36
+ ```env
37
+ OPENAI_API_KEY=sk-your-api-key-here
38
+ async_mode=true
39
+ ml_model_class=amsdal_ml.ml_models.openai_model.OpenAIModel
40
+ ml_retriever_class=amsdal_ml.ml_retrievers.openai_retriever.OpenAIRetriever
41
+ ml_ingesting_class=amsdal_ml.ml_ingesting.openai_ingesting.OpenAIIngesting
42
+ ```
43
+
44
+ Create a `config.yml` for AMSDAL connections:
45
+
46
+ ```yaml
47
+ application_name: my-ml-app
48
+ async_mode: true
49
+ connections:
50
+ - name: sqlite_state
51
+ backend: sqlite-state-async
52
+ credentials:
53
+ - db_path: ./warehouse/state.sqlite3
54
+ - check_same_thread: false
55
+ - name: lock
56
+ backend: amsdal_data.lock.implementations.thread_lock.ThreadLock
57
+ resources_config:
58
+ repository:
59
+ default: sqlite_state
60
+ lock: lock
61
+ ```
62
+
63
+ ### 2. Generate Embeddings
64
+
65
+ ```python
66
+ from amsdal_ml.ml_ingesting.openai_ingesting import OpenAIIngesting
67
+ from amsdal_ml.ml_config import ml_config
68
+
69
+ # Initialize ingesting
70
+ ingester = OpenAIIngesting(
71
+ model=MyModel,
72
+ embedding_field='embedding',
73
+ )
74
+
75
+ # Generate embeddings for an instance
76
+ instance = MyModel(content='Your text here')
77
+ embeddings = await ingester.agenerate_embeddings(instance)
78
+ await ingester.asave(embeddings, instance)
79
+ ```
80
+
81
+ ### 3. Semantic Search
82
+
83
+ ```python
84
+ from amsdal_ml.ml_retrievers.openai_retriever import OpenAIRetriever
85
+
86
+ retriever = OpenAIRetriever()
87
+
88
+ # Search for relevant content
89
+ results = await retriever.asimilarity_search(
90
+ query='What is machine learning?',
91
+ k=5,
92
+ include_tags=['documentation']
93
+ )
94
+
95
+ for chunk in results:
96
+ print(f'{chunk.object_class}:{chunk.object_id} - {chunk.raw_text}')
97
+ ```
98
+
99
+ ### 4. Build an AI Agent
100
+
101
+ ```python
102
+ from amsdal_ml.agents.default_qa_agent import DefaultQAAgent
103
+
104
+ agent = DefaultQAAgent()
105
+
106
+ # Ask questions
107
+ output = await agent.arun('Explain vector embeddings')
108
+ print(output.answer)
109
+ print(f'Used tools: {output.used_tools}')
110
+
111
+ # Stream responses
112
+ async for chunk in agent.astream('What is semantic search?'):
113
+ print(chunk, end='', flush=True)
114
+ ```
115
+
116
+ ## Architecture
117
+
118
+ ### Core Components
119
+
120
+ - **`MLModel`**: Abstract interface for LLM inference (invoke, stream, with attachments)
121
+ - **`MLIngesting`**: Generate text and embeddings from data objects with chunking
122
+ - **`MLRetriever`**: Semantic similarity search with tag-based filtering
123
+ - **`Agent`**: Q&A and task-oriented agents with streaming and citations
124
+ - **`EmbeddingModel`**: Database model storing 1536-dimensional vectors linked to source objects
125
+ - **MCP Server/Client**: Expose retrievers as tools or consume external MCP services
126
+
127
+ ### Configuration
128
+
129
+ All settings are managed via `MLConfig` in `.env`:
130
+
131
+ ```env
132
+ # Model Configuration
133
+ llm_model_name=gpt-4o
134
+ llm_temperature=0.0
135
+ embed_model_name=text-embedding-3-small
136
+
137
+ # Chunking Parameters
138
+ embed_max_depth=2
139
+ embed_max_chunks=10
140
+ embed_max_tokens_per_chunk=800
141
+
142
+ # Retrieval Settings
143
+ retriever_default_k=8
144
+ ```
145
+
146
+ ## Development
147
+
148
+ ### Setup
149
+
150
+ ```bash
151
+ # Install dependencies
152
+ pip install --upgrade uv hatch==1.14.2
153
+ hatch env create
154
+ hatch run sync
155
+ ```
156
+
157
+ ### Testing
158
+
159
+ ```bash
160
+ # Run all tests with coverage
161
+ hatch run cov
162
+
163
+ # Run specific tests
164
+ hatch run test tests/test_openai_model.py
165
+
166
+ # Watch mode
167
+ pytest tests/ -v
168
+ ```
169
+
170
+ ### Code Quality
171
+
172
+ ```bash
173
+ # Run all checks (style + typing)
174
+ hatch run all
175
+
176
+ # Format code
177
+ hatch run fmt
178
+
179
+ # Type checking
180
+ hatch run typing
181
+ ```
182
+
183
+ ### AMSDAL CLI
184
+
185
+ ```bash
186
+ # Generate a new model
187
+ amsdal generate model MyModel --format py
188
+
189
+ # Generate property
190
+ amsdal generate property --model MyModel embedding_field
191
+
192
+ # Generate transaction
193
+ amsdal generate transaction ProcessEmbeddings
194
+
195
+ # Generate hook
196
+ amsdal generate hook --model MyModel on_create
197
+ ```
198
+
199
+ ## MCP Server
200
+
201
+ Run the retriever as an MCP server for integration with Claude Desktop or other MCP clients:
202
+
203
+ ```bash
204
+ python -m amsdal_ml.mcp_server.server_retriever_stdio \
205
+ --amsdal-config "$(echo '{"async_mode": true, ...}' | base64)"
206
+ ```
207
+
208
+ The server exposes a `search` tool for semantic search in your knowledge base.
209
+
210
+ ## License
211
+
212
+ See `amsdal_ml/Third-Party Materials - AMSDAL Dependencies - License Notices.md` for dependency licenses.
213
+
214
+ ## Links
215
+
216
+ - [AMSDAL Framework](https://github.com/amsdal/amsdal)
217
+ - [Documentation](https://docs.amsdal.com)
218
+ - [Issue Tracker](https://github.com/amsdal/amsdal_ml/issues)
@@ -0,0 +1,180 @@
1
+ # Release Guide
2
+
3
+ Follow these steps each time you release a new version of `amsdal-ml`.
4
+
5
+ ---
6
+
7
+ ## Step 0: Check Working Directory
8
+
9
+ Before starting, ensure you have a clean working directory:
10
+
11
+ ```bash
12
+ git status
13
+ ```
14
+
15
+ **If you have uncommitted changes:**
16
+ 1. Review the changes: `git diff`
17
+ 2. Decide what to do:
18
+ - **Commit them now** if they should be part of this release:
19
+ ```bash
20
+ git add .
21
+ git commit -m "Your commit message"
22
+ git push origin main
23
+ ```
24
+ - **Stash them** if they're unrelated to this release:
25
+ ```bash
26
+ git stash
27
+ # After release, restore with: git stash pop
28
+ ```
29
+ - **Discard them** if they're not needed:
30
+ ```bash
31
+ git restore . # Be careful - this cannot be undone!
32
+ ```
33
+
34
+ ---
35
+
36
+ ## Step 1: Update Version Number
37
+
38
+ Edit `amsdal_ml/__about__.py`:
39
+
40
+ ```python
41
+ __version__ = '0.1.4' # Change to your new version
42
+ ```
43
+
44
+ **Version format**: `MAJOR.MINOR.PATCH`
45
+ - PATCH: Bug fixes (e.g., `0.1.3` → `0.1.4`)
46
+ - MINOR: New features (e.g., `0.1.4` → `0.2.0`)
47
+ - MAJOR: Breaking changes (e.g., `0.2.0` → `1.0.0`)
48
+
49
+ ---
50
+
51
+ ## Step 2: Update Changelogs
52
+
53
+ ### 2a. Update `latest-changelogs.md`
54
+
55
+ Replace entire content with your new release notes:
56
+
57
+ ```markdown
58
+ ## [v0.1.4](https://pypi.org/project/amsdal_ml/0.1.4/) - 2025-10-15
59
+
60
+ ### Description of changes
61
+
62
+ - First change
63
+ - Second change
64
+ - Third change
65
+ ```
66
+
67
+ ### 2b. Update `change-logs.md`
68
+
69
+ Prepend the same content to the top of the file (keep existing entries below).
70
+
71
+ ---
72
+
73
+ ## Step 3: Run Quality Checks
74
+
75
+ ```bash
76
+ hatch run all
77
+ hatch run cov
78
+ ```
79
+
80
+ All checks must pass before continuing.
81
+
82
+ ---
83
+
84
+ ## Step 4: Create Release Branch
85
+
86
+ ```bash
87
+ git checkout -b release/v0.1.4
88
+ ```
89
+
90
+ ---
91
+
92
+ ## Step 5: Commit and Push
93
+
94
+ ```bash
95
+ git add amsdal_ml/__about__.py latest-changelogs.md change-logs.md
96
+ git commit -m "Release v0.1.4"
97
+ git push origin release/v0.1.4
98
+ ```
99
+
100
+ ---
101
+
102
+ ## Step 6: Create Pull Request
103
+
104
+ 1. Go to: https://github.com/amsdal/amsdal_ml/pulls
105
+ 2. Click "New pull request"
106
+ 3. Set base: `main` ← compare: `release/v0.1.4`
107
+ 4. Title: `Release v0.1.4`
108
+ 5. Add description with changelog content
109
+ 6. Create and merge the PR
110
+
111
+ ---
112
+
113
+ ## Step 7: Checkout Main and Pull
114
+
115
+ ```bash
116
+ git checkout main
117
+ git pull origin main
118
+ ```
119
+
120
+ ---
121
+
122
+ ## Step 8: Create and Push Tag
123
+
124
+ ```bash
125
+ git tag -a v0.1.4 -m "Release v0.1.4"
126
+ git push origin v0.1.4
127
+ ```
128
+
129
+ **Important**: Tag must start with `v` (e.g., `v0.1.4`)
130
+
131
+ ---
132
+
133
+ ## Step 9: Monitor CI/CD
134
+
135
+ Go to: https://github.com/amsdal/amsdal_ml/actions
136
+
137
+ Wait for all jobs to complete:
138
+ 1. ✅ License check
139
+ 2. ✅ Build
140
+ 3. ✅ Publish to PyPI
141
+ 4. ✅ Create GitHub release
142
+
143
+ ---
144
+
145
+ ## Step 10: Verify Release
146
+
147
+ ### Check PyPI
148
+ https://pypi.org/project/amsdal-ml/
149
+
150
+ ### Check GitHub Releases
151
+ https://github.com/amsdal/amsdal_ml/releases
152
+
153
+ ### Test Installation
154
+ ```bash
155
+ pip install --upgrade amsdal-ml
156
+ python -c "import amsdal_ml; print(amsdal_ml.__version__)"
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Done! ✅
162
+
163
+ ---
164
+
165
+ ## Troubleshooting
166
+
167
+ **If CI fails:**
168
+ 1. Check logs at https://github.com/amsdal/amsdal_ml/actions
169
+ 2. Fix the issue
170
+ 3. Delete and recreate the tag:
171
+ ```bash
172
+ git tag -d v0.1.4
173
+ git push origin :refs/tags/v0.1.4
174
+ git tag -a v0.1.4 -m "Release v0.1.4"
175
+ git push origin v0.1.4
176
+ ```
177
+
178
+ **If you need to rollback:**
179
+ - Release a new patch version with the fix (recommended)
180
+ - Or yank the release from PyPI using `twine`
@@ -0,0 +1 @@
1
+ __version__ = '0.1.4'
@@ -27,7 +27,16 @@ class RetrieverArgs(BaseModel):
27
27
  exclude_tags: Optional[list[str]] = None
28
28
 
29
29
 
30
- _retriever = OpenAIRetriever()
30
+ class _RetrieverSingleton:
31
+ """Singleton holder for lazy retriever initialization."""
32
+ _instance: Optional[OpenAIRetriever] = None
33
+
34
+ @classmethod
35
+ def get(cls) -> OpenAIRetriever:
36
+ """Lazy initialization of retriever to ensure env vars are loaded."""
37
+ if cls._instance is None:
38
+ cls._instance = OpenAIRetriever()
39
+ return cls._instance
31
40
 
32
41
 
33
42
  async def retriever_search(
@@ -39,7 +48,8 @@ async def retriever_search(
39
48
  logging.info(
40
49
  f"retriever_search called with query={query}, k={k}, include_tags={include_tags}, exclude_tags={exclude_tags}"
41
50
  )
42
- chunks = await _retriever.asimilarity_search(
51
+ retriever = _RetrieverSingleton.get()
52
+ chunks = await retriever.asimilarity_search(
43
53
  query=query,
44
54
  k=k,
45
55
  include_tags=include_tags,
@@ -94,7 +94,7 @@ class StdioClient(ToolClient):
94
94
 
95
95
  if not self._persist:
96
96
  async with AsyncExitStack() as stack:
97
- params = StdioServerParameters(command=self._command, args=self._args)
97
+ params = StdioServerParameters(command=self._command, args=self._args, env=os.environ.copy())
98
98
  rx, tx = await stack.enter_async_context(stdio_client(params))
99
99
  s = await stack.enter_async_context(ClientSession(rx, tx))
100
100
  await s.initialize()
@@ -123,7 +123,7 @@ class StdioClient(ToolClient):
123
123
  async def call(self, tool_name: str, args: dict[str, Any], *, timeout: float | None = None) -> Any:
124
124
  if not self._persist:
125
125
  async with AsyncExitStack() as stack:
126
- params = StdioServerParameters(command=self._command, args=self._args)
126
+ params = StdioServerParameters(command=self._command, args=self._args, env=os.environ.copy())
127
127
  rx, tx = await stack.enter_async_context(stdio_client(params))
128
128
  s = await stack.enter_async_context(ClientSession(rx, tx))
129
129
  await s.initialize()
@@ -1,3 +1,13 @@
1
+ ## [v0.1.4](https://pypi.org/project/amsdal_ml/0.1.4/) - 2025-10-15
2
+
3
+ ### Fixed retriever initialization in K8s environments
4
+
5
+ - Fixed lazy initialization of OpenAIRetriever to ensure env vars are loaded
6
+ - Added missing env parameter to stdio_client for non-persistent sessions
7
+ - Environment variables now properly passed to MCP stdio subprocesses
8
+ - Updated README.md to be production-ready
9
+ - Added RELEASE.md with step-by-step release guide
10
+
1
11
  ## [v0.1.3](https://pypi.org/project/amsdal_ml/0.1.3/) - 2025-10-13
2
12
 
3
13
  ### Pass env vars into stdio server
@@ -0,0 +1,9 @@
1
+ ## [v0.1.4](https://pypi.org/project/amsdal_ml/0.1.4/) - 2025-10-15
2
+
3
+ ### Fixed retriever initialization in K8s environments
4
+
5
+ - Fixed lazy initialization of OpenAIRetriever to ensure env vars are loaded
6
+ - Added missing env parameter to stdio_client for non-persistent sessions
7
+ - Environment variables now properly passed to MCP stdio subprocesses
8
+ - Updated README.md to be production-ready
9
+ - Added RELEASE.md with step-by-step release guide
amsdal_ml-0.1.3/PKG-INFO DELETED
@@ -1,69 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: amsdal_ml
3
- Version: 0.1.3
4
- Summary: amsdal_ml plugin for AMSDAL Framework
5
- Requires-Python: >=3.11
6
- Requires-Dist: aiohttp==3.12.15
7
- Requires-Dist: amsdal-cli>=0.5.7
8
- Requires-Dist: amsdal-data>=0.5.9
9
- Requires-Dist: amsdal-models>=0.5.9
10
- Requires-Dist: amsdal-utils>=0.5.4
11
- Requires-Dist: amsdal>=0.5.6
12
- Requires-Dist: mcp>=0.1
13
- Requires-Dist: openai==1.100.2
14
- Requires-Dist: pydantic-settings==2.10.1
15
- Requires-Dist: pydantic==2.11.7
16
- Description-Content-Type: text/markdown
17
-
18
- # amsdal-ml
19
-
20
- This plugin extends the AMSDAL Framework with machine learning utilities,
21
- including custom models for embeddings, properties for ML metadata, and
22
- hooks for working with vector search and AI-driven features.
23
-
24
- ## Plugin Structure
25
-
26
- - `src/models/` - Contains model definitions in Python format
27
- - `src/transactions/` - Contains transaction definitions
28
- - `pyproject.toml` - Plugin configuration file
29
- - `config.yml` - Configuration for connections
30
-
31
- ## Installing this Plugin
32
-
33
- To use this plugin in an AMSDAL application:
34
-
35
- 1. Copy the plugin directory to your AMSDAL application
36
- 2. Import the models and transactions as needed
37
- 3. Register the plugin in your application configuration
38
-
39
- ## Development
40
-
41
- This plugin uses sync mode.
42
-
43
- ### Adding Models
44
-
45
- ```bash
46
- amsdal generate model ModelName --format py
47
- ```
48
-
49
- ### Adding Properties
50
-
51
- ```bash
52
- amsdal generate property --model ModelName property_name
53
- ```
54
-
55
- ### Adding Transactions
56
-
57
- ```bash
58
- amsdal generate transaction TransactionName
59
- ```
60
-
61
- ### Adding Hooks
62
-
63
- ```bash
64
- amsdal generate hook --model ModelName on_create
65
- ```
66
-
67
- ## Testing
68
-
69
- Test your plugin by integrating it with an AMSDAL application and running the application's test suite.
amsdal_ml-0.1.3/README.md DELETED
@@ -1,52 +0,0 @@
1
- # amsdal-ml
2
-
3
- This plugin extends the AMSDAL Framework with machine learning utilities,
4
- including custom models for embeddings, properties for ML metadata, and
5
- hooks for working with vector search and AI-driven features.
6
-
7
- ## Plugin Structure
8
-
9
- - `src/models/` - Contains model definitions in Python format
10
- - `src/transactions/` - Contains transaction definitions
11
- - `pyproject.toml` - Plugin configuration file
12
- - `config.yml` - Configuration for connections
13
-
14
- ## Installing this Plugin
15
-
16
- To use this plugin in an AMSDAL application:
17
-
18
- 1. Copy the plugin directory to your AMSDAL application
19
- 2. Import the models and transactions as needed
20
- 3. Register the plugin in your application configuration
21
-
22
- ## Development
23
-
24
- This plugin uses sync mode.
25
-
26
- ### Adding Models
27
-
28
- ```bash
29
- amsdal generate model ModelName --format py
30
- ```
31
-
32
- ### Adding Properties
33
-
34
- ```bash
35
- amsdal generate property --model ModelName property_name
36
- ```
37
-
38
- ### Adding Transactions
39
-
40
- ```bash
41
- amsdal generate transaction TransactionName
42
- ```
43
-
44
- ### Adding Hooks
45
-
46
- ```bash
47
- amsdal generate hook --model ModelName on_create
48
- ```
49
-
50
- ## Testing
51
-
52
- Test your plugin by integrating it with an AMSDAL application and running the application's test suite.
@@ -1 +0,0 @@
1
- __version__ = '0.1.3'
@@ -1,6 +0,0 @@
1
- ## [v0.1.3](https://pypi.org/project/amsdal_ml/0.1.3/) - 2025-10-13
2
-
3
- ### Pass env vars into stdio server
4
-
5
- - Pass env vars into stdio server
6
- - cleanup of app.py
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes