amsdal_ml 0.1.4__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.github/workflows/ci.yml +18 -3
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.gitignore +2 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/PKG-INFO +61 -3
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/README.md +57 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/Third-Party Materials - AMSDAL Dependencies - License Notices.md +617 -0
- amsdal_ml-0.2.1/amsdal_ml/__about__.py +1 -0
- amsdal_ml-0.2.1/amsdal_ml/agents/__init__.py +13 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/agents/agent.py +5 -7
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/agents/default_qa_agent.py +108 -143
- amsdal_ml-0.2.1/amsdal_ml/agents/functional_calling_agent.py +233 -0
- amsdal_ml-0.2.1/amsdal_ml/agents/mcp_client_tool.py +46 -0
- amsdal_ml-0.2.1/amsdal_ml/agents/python_tool.py +86 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/agents/retriever_tool.py +5 -6
- amsdal_ml-0.2.1/amsdal_ml/agents/tool_adapters.py +98 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/fileio/base_loader.py +7 -5
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/fileio/openai_loader.py +16 -17
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/mcp_client/base.py +2 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/mcp_client/http_client.py +7 -1
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/mcp_client/stdio_client.py +19 -16
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/mcp_server/server_retriever_stdio.py +8 -11
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/__init__.py +29 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_ingesting/default_ingesting.py +49 -51
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/embedders/__init__.py +4 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/embedders/embedder.py +12 -0
- amsdal_ml-0.1.4/amsdal_ml/ml_retrievers/openai_retriever.py → amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/embedders/openai_embedder.py +6 -15
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_ingesting/embedding_data.py +3 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/loaders/__init__.py +6 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/loaders/folder_loader.py +52 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/loaders/loader.py +28 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/loaders/pdf_loader.py +136 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/loaders/text_loader.py +44 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/model_ingester.py +278 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/pipeline.py +131 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/pipeline_interface.py +31 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/processors/__init__.py +4 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/processors/cleaner.py +14 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/processors/text_cleaner.py +42 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/splitters/__init__.py +4 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/splitters/splitter.py +15 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/splitters/token_splitter.py +85 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/stores/__init__.py +4 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/stores/embedding_data.py +63 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/stores/store.py +22 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/types.py +40 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_models/models.py +179 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_models/openai_model.py +679 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_models/utils.py +7 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_retrievers/__init__.py +17 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_retrievers/adapters.py +93 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_retrievers/default_retriever.py +11 -1
- amsdal_ml-0.2.1/amsdal_ml/ml_retrievers/openai_retriever.py +59 -0
- amsdal_ml-0.2.1/amsdal_ml/ml_retrievers/query_retriever.py +487 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_retrievers/retriever.py +12 -0
- amsdal_ml-0.2.1/amsdal_ml/models/embedding_model.py +21 -0
- amsdal_ml-0.2.1/amsdal_ml/prompts/__init__.py +77 -0
- amsdal_ml-0.2.1/amsdal_ml/prompts/database_query_agent.prompt +14 -0
- amsdal_ml-0.2.1/amsdal_ml/prompts/functional_calling_agent_base.prompt +9 -0
- amsdal_ml-0.2.1/amsdal_ml/prompts/nl_query_filter.prompt +318 -0
- {amsdal_ml-0.1.4/amsdal_ml/agents/promts → amsdal_ml-0.2.1/amsdal_ml/prompts}/react_chat.prompt +17 -8
- amsdal_ml-0.2.1/amsdal_ml/utils/__init__.py +5 -0
- amsdal_ml-0.2.1/amsdal_ml/utils/query_utils.py +189 -0
- amsdal_ml-0.2.1/change-logs.md +57 -0
- amsdal_ml-0.2.1/docker-compose.tests.yml +16 -0
- amsdal_ml-0.2.1/latest-changelogs.md +7 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/pyproject.toml +9 -5
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/tests/agents_tests/test_arun.py +2 -2
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/tests/agents_tests/test_astream.py +7 -7
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/tests/agents_tests/test_astream_final_only.py +5 -5
- amsdal_ml-0.2.1/tests/agents_tests/test_fakes.py +272 -0
- amsdal_ml-0.2.1/tests/agents_tests/test_functional_calling_agent.py +241 -0
- amsdal_ml-0.2.1/tests/agents_tests/test_qa_agent_with_nlq_tool.py +268 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/tests/agents_tests/test_tool_call_arguments_async.py +16 -6
- amsdal_ml-0.2.1/tests/conftest.py +220 -0
- amsdal_ml-0.2.1/tests/fixtures/models/author.py +15 -0
- amsdal_ml-0.2.1/tests/fixtures/models/book.py +16 -0
- amsdal_ml-0.2.1/tests/fixtures/models/category.py +16 -0
- amsdal_ml-0.2.1/tests/fixtures/models/order.py +18 -0
- amsdal_ml-0.2.1/tests/fixtures/models/product.py +18 -0
- amsdal_ml-0.2.1/tests/fixtures/models/user.py +18 -0
- amsdal_ml-0.2.1/tests/fixtures/models/vehicle.py +32 -0
- amsdal_ml-0.2.1/tests/ingesting/test_folder_loader_and_rag.py +120 -0
- amsdal_ml-0.2.1/tests/ingesting/test_ingestion_components.py +232 -0
- amsdal_ml-0.2.1/tests/ingesting/test_model_ingester.py +140 -0
- amsdal_ml-0.2.1/tests/nlqueryretriever_tests/mock_tests/__init__.py +0 -0
- amsdal_ml-0.2.1/tests/nlqueryretriever_tests/mock_tests/test_retriever_mock.py +651 -0
- amsdal_ml-0.2.1/tests/nlqueryretriever_tests/models_tests/__init__.py +0 -0
- amsdal_ml-0.2.1/tests/nlqueryretriever_tests/models_tests/test_nl_query_models.py +305 -0
- amsdal_ml-0.2.1/tests/nlqueryretriever_tests/schema_tests/__init__.py +0 -0
- amsdal_ml-0.2.1/tests/nlqueryretriever_tests/schema_tests/test_comprehensive_schema.py +348 -0
- amsdal_ml-0.2.1/tests/nlqueryretriever_tests/schema_tests/test_nested_list_skipping.py +135 -0
- amsdal_ml-0.2.1/tests/test_files/.gitkeep +0 -0
- amsdal_ml-0.2.1/tests/test_files/pdf/Aspida.pdf +0 -0
- amsdal_ml-0.2.1/tests/test_files/pdf/Nassau.pdf +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/tests/test_openai_model.py +64 -2
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/uv.lock +1030 -768
- amsdal_ml-0.1.4/amsdal_ml/__about__.py +0 -1
- amsdal_ml-0.1.4/amsdal_ml/agents/promts/__init__.py +0 -58
- amsdal_ml-0.1.4/amsdal_ml/ml_models/models.py +0 -87
- amsdal_ml-0.1.4/amsdal_ml/ml_models/openai_model.py +0 -371
- amsdal_ml-0.1.4/amsdal_ml/models/embedding_model.py +0 -21
- amsdal_ml-0.1.4/change-logs.md +0 -34
- amsdal_ml-0.1.4/latest-changelogs.md +0 -9
- amsdal_ml-0.1.4/tests/agents_tests/test_fakes.py +0 -173
- amsdal_ml-0.1.4/tests/conftest.py +0 -105
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.amsdal/.dependencies +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.amsdal/.environment +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.amsdal/.secrets +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.amsdal-cli +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.github/workflows/release.yml +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.github/workflows/tag_check.yml +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/CLAUDE.md +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/RELEASE.md +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/__init__.py +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/app.py +0 -0
- {amsdal_ml-0.1.4/amsdal_ml/agents → amsdal_ml-0.2.1/amsdal_ml/fileio}/__init__.py +0 -0
- {amsdal_ml-0.1.4/amsdal_ml/fileio → amsdal_ml-0.2.1/amsdal_ml/mcp_client}/__init__.py +0 -0
- {amsdal_ml-0.1.4/amsdal_ml/mcp_client → amsdal_ml-0.2.1/amsdal_ml/mcp_server}/__init__.py +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/migrations/0000_initial.py +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_config.py +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_ingesting/ingesting.py +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_ingesting/openai_ingesting.py +0 -0
- {amsdal_ml-0.1.4/amsdal_ml/mcp_server → amsdal_ml-0.2.1/amsdal_ml/ml_models}/__init__.py +0 -0
- {amsdal_ml-0.1.4/amsdal_ml/ml_ingesting → amsdal_ml-0.2.1/amsdal_ml/models}/__init__.py +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/py.typed +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/config.yml +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/license_check.py +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/scripts/release.sh +0 -0
- {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/scripts/tag_check.sh +0 -0
- {amsdal_ml-0.1.4/amsdal_ml/ml_models → amsdal_ml-0.2.1/tests}/__init__.py +0 -0
- {amsdal_ml-0.1.4/amsdal_ml/ml_retrievers → amsdal_ml-0.2.1/tests/agents_tests}/__init__.py +0 -0
- {amsdal_ml-0.1.4/amsdal_ml → amsdal_ml-0.2.1/tests/fixtures}/models/__init__.py +0 -0
- {amsdal_ml-0.1.4/tests → amsdal_ml-0.2.1/tests/ingesting}/__init__.py +0 -0
- {amsdal_ml-0.1.4/tests/agents_tests → amsdal_ml-0.2.1/tests/nlqueryretriever_tests}/__init__.py +0 -0
|
@@ -23,7 +23,7 @@ jobs:
|
|
|
23
23
|
python license_check.py
|
|
24
24
|
|
|
25
25
|
test-lint:
|
|
26
|
-
name: Run tests and check style
|
|
26
|
+
name: Run tests and check style (Python ${{ matrix.python-version }}, ${{ matrix.database-backend }})
|
|
27
27
|
needs: [license-check]
|
|
28
28
|
runs-on: self-hosted
|
|
29
29
|
strategy:
|
|
@@ -31,6 +31,21 @@ jobs:
|
|
|
31
31
|
fail-fast: false
|
|
32
32
|
matrix:
|
|
33
33
|
python-version: ["3.11", "3.12"]
|
|
34
|
+
database-backend: ["sqlite", "postgres"]
|
|
35
|
+
services:
|
|
36
|
+
postgres:
|
|
37
|
+
image: pgvector/pgvector:pg16
|
|
38
|
+
env:
|
|
39
|
+
POSTGRES_USER: postgres
|
|
40
|
+
POSTGRES_PASSWORD: example
|
|
41
|
+
POSTGRES_DB: postgres
|
|
42
|
+
ports:
|
|
43
|
+
- 5432:5432
|
|
44
|
+
options: >-
|
|
45
|
+
--health-cmd pg_isready
|
|
46
|
+
--health-interval 10s
|
|
47
|
+
--health-timeout 5s
|
|
48
|
+
--health-retries 5
|
|
34
49
|
env:
|
|
35
50
|
PYTHON: ${{ matrix.python-version }}
|
|
36
51
|
DEPS: yes
|
|
@@ -54,9 +69,9 @@ jobs:
|
|
|
54
69
|
hatch run sync
|
|
55
70
|
|
|
56
71
|
- name: Run style checks
|
|
57
|
-
if: always()
|
|
72
|
+
if: always() && matrix.database-backend == 'sqlite'
|
|
58
73
|
run: hatch run all
|
|
59
74
|
|
|
60
75
|
- name: Run tests
|
|
61
76
|
if: always()
|
|
62
|
-
run: hatch run cov tests/
|
|
77
|
+
run: hatch run cov tests/ -- --database_backend=${{ matrix.database-backend }}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: amsdal_ml
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: amsdal_ml plugin for AMSDAL Framework
|
|
5
5
|
Requires-Python: >=3.11
|
|
6
6
|
Requires-Dist: aiohttp==3.12.15
|
|
@@ -11,8 +11,9 @@ Requires-Dist: amsdal-utils>=0.5.4
|
|
|
11
11
|
Requires-Dist: amsdal>=0.5.6
|
|
12
12
|
Requires-Dist: mcp>=0.1
|
|
13
13
|
Requires-Dist: openai==1.100.2
|
|
14
|
-
Requires-Dist: pydantic-settings
|
|
15
|
-
Requires-Dist: pydantic
|
|
14
|
+
Requires-Dist: pydantic-settings~=2.12
|
|
15
|
+
Requires-Dist: pydantic~=2.12
|
|
16
|
+
Requires-Dist: pymupdf>=1.24.10
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
17
18
|
|
|
18
19
|
# AMSDAL ML
|
|
@@ -130,6 +131,53 @@ async for chunk in agent.astream('What is semantic search?'):
|
|
|
130
131
|
print(chunk, end='', flush=True)
|
|
131
132
|
```
|
|
132
133
|
|
|
134
|
+
### 5. Functional Calling Agent with Python Tools
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from amsdal_ml.agents.functional_calling_agent import FunctionalCallingAgent
|
|
138
|
+
from amsdal_ml.agents.python_tool import PythonTool
|
|
139
|
+
from amsdal_ml.ml_models.openai_model import OpenAIModel
|
|
140
|
+
|
|
141
|
+
llm = OpenAIModel()
|
|
142
|
+
agent = FunctionalCallingAgent(model=llm, tools=[search_tool, render_tool])
|
|
143
|
+
result = await agent.arun(user_query="Find products with price > 100", history=[])
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### 6. Natural Language Query Retriever
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from amsdal_ml.ml_retrievers.query_retriever import NLQueryRetriever
|
|
150
|
+
|
|
151
|
+
retriever = NLQueryRetriever(llm=llm, queryset=Product.objects.all())
|
|
152
|
+
documents = await retriever.invoke("Show me red products", limit=10)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### 7. Document Ingestion Pipeline
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from amsdal_ml.ml_ingesting import ModelIngester
|
|
159
|
+
from amsdal_ml.ml_ingesting.pipeline import DefaultIngestionPipeline
|
|
160
|
+
from amsdal_ml.ml_ingesting.loaders.pdf_loader import PdfLoader
|
|
161
|
+
from amsdal_ml.ml_ingesting.processors.text_cleaner import TextCleaner
|
|
162
|
+
from amsdal_ml.ml_ingesting.splitters.token_splitter import TokenSplitter
|
|
163
|
+
from amsdal_ml.ml_ingesting.embedders.openai_embedder import OpenAIEmbedder
|
|
164
|
+
from amsdal_ml.ml_ingesting.stores.embedding_data import EmbeddingDataStore
|
|
165
|
+
|
|
166
|
+
pipeline = DefaultIngestionPipeline(
|
|
167
|
+
loader=PdfLoader(), # Uses pymupdf for PDF processing
|
|
168
|
+
cleaner=TextCleaner(),
|
|
169
|
+
splitter=TokenSplitter(max_tokens=800, overlap_tokens=80),
|
|
170
|
+
embedder=OpenAIEmbedder(),
|
|
171
|
+
store=EmbeddingDataStore(),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
ingester = ModelIngester(
|
|
175
|
+
pipeline=pipeline,
|
|
176
|
+
base_tags=["document"],
|
|
177
|
+
base_metadata={"source": "pdf"},
|
|
178
|
+
)
|
|
179
|
+
```
|
|
180
|
+
|
|
133
181
|
## Architecture
|
|
134
182
|
|
|
135
183
|
### Core Components
|
|
@@ -139,6 +187,16 @@ async for chunk in agent.astream('What is semantic search?'):
|
|
|
139
187
|
- **`MLRetriever`**: Semantic similarity search with tag-based filtering
|
|
140
188
|
- **`Agent`**: Q&A and task-oriented agents with streaming and citations
|
|
141
189
|
- **`EmbeddingModel`**: Database model storing 1536-dimensional vectors linked to source objects
|
|
190
|
+
- **`PythonTool`**: Tool for executing Python functions within agents
|
|
191
|
+
- **`FunctionalCallingAgent`**: Agent specialized in functional calling with configurable tools
|
|
192
|
+
- **`NLQueryRetriever`**: Retriever for natural language queries on AMSDAL querysets
|
|
193
|
+
- **`DefaultIngestionPipeline`**: Pipeline for document ingestion including loader, cleaner, splitter, embedder, and store
|
|
194
|
+
- **`ModelIngester`**: High-level ingester for processing models with customizable pipelines and metadata
|
|
195
|
+
- **`PdfLoader`**: Document loader using pymupdf for PDF processing
|
|
196
|
+
- **`TextCleaner`**: Processor for cleaning and normalizing text
|
|
197
|
+
- **`TokenSplitter`**: Splitter for dividing text into chunks based on token count
|
|
198
|
+
- **`OpenAIEmbedder`**: Embedder for generating embeddings via OpenAI API
|
|
199
|
+
- **`EmbeddingDataStore`**: Store for saving embedding data linked to source objects
|
|
142
200
|
- **MCP Server/Client**: Expose retrievers as tools or consume external MCP services
|
|
143
201
|
|
|
144
202
|
### Configuration
|
|
@@ -113,6 +113,53 @@ async for chunk in agent.astream('What is semantic search?'):
|
|
|
113
113
|
print(chunk, end='', flush=True)
|
|
114
114
|
```
|
|
115
115
|
|
|
116
|
+
### 5. Functional Calling Agent with Python Tools
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from amsdal_ml.agents.functional_calling_agent import FunctionalCallingAgent
|
|
120
|
+
from amsdal_ml.agents.python_tool import PythonTool
|
|
121
|
+
from amsdal_ml.ml_models.openai_model import OpenAIModel
|
|
122
|
+
|
|
123
|
+
llm = OpenAIModel()
|
|
124
|
+
agent = FunctionalCallingAgent(model=llm, tools=[search_tool, render_tool])
|
|
125
|
+
result = await agent.arun(user_query="Find products with price > 100", history=[])
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### 6. Natural Language Query Retriever
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from amsdal_ml.ml_retrievers.query_retriever import NLQueryRetriever
|
|
132
|
+
|
|
133
|
+
retriever = NLQueryRetriever(llm=llm, queryset=Product.objects.all())
|
|
134
|
+
documents = await retriever.invoke("Show me red products", limit=10)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### 7. Document Ingestion Pipeline
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from amsdal_ml.ml_ingesting import ModelIngester
|
|
141
|
+
from amsdal_ml.ml_ingesting.pipeline import DefaultIngestionPipeline
|
|
142
|
+
from amsdal_ml.ml_ingesting.loaders.pdf_loader import PdfLoader
|
|
143
|
+
from amsdal_ml.ml_ingesting.processors.text_cleaner import TextCleaner
|
|
144
|
+
from amsdal_ml.ml_ingesting.splitters.token_splitter import TokenSplitter
|
|
145
|
+
from amsdal_ml.ml_ingesting.embedders.openai_embedder import OpenAIEmbedder
|
|
146
|
+
from amsdal_ml.ml_ingesting.stores.embedding_data import EmbeddingDataStore
|
|
147
|
+
|
|
148
|
+
pipeline = DefaultIngestionPipeline(
|
|
149
|
+
loader=PdfLoader(), # Uses pymupdf for PDF processing
|
|
150
|
+
cleaner=TextCleaner(),
|
|
151
|
+
splitter=TokenSplitter(max_tokens=800, overlap_tokens=80),
|
|
152
|
+
embedder=OpenAIEmbedder(),
|
|
153
|
+
store=EmbeddingDataStore(),
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
ingester = ModelIngester(
|
|
157
|
+
pipeline=pipeline,
|
|
158
|
+
base_tags=["document"],
|
|
159
|
+
base_metadata={"source": "pdf"},
|
|
160
|
+
)
|
|
161
|
+
```
|
|
162
|
+
|
|
116
163
|
## Architecture
|
|
117
164
|
|
|
118
165
|
### Core Components
|
|
@@ -122,6 +169,16 @@ async for chunk in agent.astream('What is semantic search?'):
|
|
|
122
169
|
- **`MLRetriever`**: Semantic similarity search with tag-based filtering
|
|
123
170
|
- **`Agent`**: Q&A and task-oriented agents with streaming and citations
|
|
124
171
|
- **`EmbeddingModel`**: Database model storing 1536-dimensional vectors linked to source objects
|
|
172
|
+
- **`PythonTool`**: Tool for executing Python functions within agents
|
|
173
|
+
- **`FunctionalCallingAgent`**: Agent specialized in functional calling with configurable tools
|
|
174
|
+
- **`NLQueryRetriever`**: Retriever for natural language queries on AMSDAL querysets
|
|
175
|
+
- **`DefaultIngestionPipeline`**: Pipeline for document ingestion including loader, cleaner, splitter, embedder, and store
|
|
176
|
+
- **`ModelIngester`**: High-level ingester for processing models with customizable pipelines and metadata
|
|
177
|
+
- **`PdfLoader`**: Document loader using pymupdf for PDF processing
|
|
178
|
+
- **`TextCleaner`**: Processor for cleaning and normalizing text
|
|
179
|
+
- **`TokenSplitter`**: Splitter for dividing text into chunks based on token count
|
|
180
|
+
- **`OpenAIEmbedder`**: Embedder for generating embeddings via OpenAI API
|
|
181
|
+
- **`EmbeddingDataStore`**: Store for saving embedding data linked to source objects
|
|
125
182
|
- **MCP Server/Client**: Expose retrievers as tools or consume external MCP services
|
|
126
183
|
|
|
127
184
|
### Configuration
|