rakam-systems-vectorstore 0.1.1rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rakam_systems_vectorstore/MANIFEST.in +26 -0
- rakam_systems_vectorstore/README.md +1071 -0
- rakam_systems_vectorstore/__init__.py +93 -0
- rakam_systems_vectorstore/components/__init__.py +0 -0
- rakam_systems_vectorstore/components/chunker/__init__.py +19 -0
- rakam_systems_vectorstore/components/chunker/advanced_chunker.py +1019 -0
- rakam_systems_vectorstore/components/chunker/text_chunker.py +154 -0
- rakam_systems_vectorstore/components/embedding_model/__init__.py +0 -0
- rakam_systems_vectorstore/components/embedding_model/configurable_embeddings.py +546 -0
- rakam_systems_vectorstore/components/embedding_model/openai_embeddings.py +259 -0
- rakam_systems_vectorstore/components/loader/__init__.py +31 -0
- rakam_systems_vectorstore/components/loader/adaptive_loader.py +512 -0
- rakam_systems_vectorstore/components/loader/code_loader.py +699 -0
- rakam_systems_vectorstore/components/loader/doc_loader.py +812 -0
- rakam_systems_vectorstore/components/loader/eml_loader.py +556 -0
- rakam_systems_vectorstore/components/loader/html_loader.py +626 -0
- rakam_systems_vectorstore/components/loader/md_loader.py +622 -0
- rakam_systems_vectorstore/components/loader/odt_loader.py +750 -0
- rakam_systems_vectorstore/components/loader/pdf_loader.py +771 -0
- rakam_systems_vectorstore/components/loader/pdf_loader_light.py +723 -0
- rakam_systems_vectorstore/components/loader/tabular_loader.py +597 -0
- rakam_systems_vectorstore/components/vectorstore/__init__.py +0 -0
- rakam_systems_vectorstore/components/vectorstore/apps.py +10 -0
- rakam_systems_vectorstore/components/vectorstore/configurable_pg_vector_store.py +1661 -0
- rakam_systems_vectorstore/components/vectorstore/faiss_vector_store.py +878 -0
- rakam_systems_vectorstore/components/vectorstore/migrations/0001_initial.py +55 -0
- rakam_systems_vectorstore/components/vectorstore/migrations/__init__.py +0 -0
- rakam_systems_vectorstore/components/vectorstore/models.py +10 -0
- rakam_systems_vectorstore/components/vectorstore/pg_models.py +97 -0
- rakam_systems_vectorstore/components/vectorstore/pg_vector_store.py +827 -0
- rakam_systems_vectorstore/config.py +266 -0
- rakam_systems_vectorstore/core.py +8 -0
- rakam_systems_vectorstore/pyproject.toml +113 -0
- rakam_systems_vectorstore/server/README.md +290 -0
- rakam_systems_vectorstore/server/__init__.py +20 -0
- rakam_systems_vectorstore/server/mcp_server_vector.py +325 -0
- rakam_systems_vectorstore/setup.py +103 -0
- rakam_systems_vectorstore-0.1.1rc7.dist-info/METADATA +370 -0
- rakam_systems_vectorstore-0.1.1rc7.dist-info/RECORD +40 -0
- rakam_systems_vectorstore-0.1.1rc7.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Generated by Django for PgVectorStore
|
|
2
|
+
|
|
3
|
+
from django.db import migrations, models
|
|
4
|
+
import django.db.models.deletion
|
|
5
|
+
import pgvector.django
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Migration(migrations.Migration):
|
|
9
|
+
|
|
10
|
+
initial = True
|
|
11
|
+
|
|
12
|
+
dependencies = [
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
operations = [
|
|
16
|
+
migrations.RunSQL(
|
|
17
|
+
"CREATE EXTENSION IF NOT EXISTS vector;",
|
|
18
|
+
reverse_sql="DROP EXTENSION IF EXISTS vector;"
|
|
19
|
+
),
|
|
20
|
+
migrations.CreateModel(
|
|
21
|
+
name='Collection',
|
|
22
|
+
fields=[
|
|
23
|
+
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
|
24
|
+
('name', models.CharField(max_length=255, unique=True)),
|
|
25
|
+
('embedding_dim', models.IntegerField(default=384)),
|
|
26
|
+
('created_at', models.DateTimeField(auto_now_add=True)),
|
|
27
|
+
('updated_at', models.DateTimeField(auto_now=True)),
|
|
28
|
+
],
|
|
29
|
+
options={
|
|
30
|
+
'db_table': 'application_collection',
|
|
31
|
+
},
|
|
32
|
+
),
|
|
33
|
+
migrations.CreateModel(
|
|
34
|
+
name='NodeEntry',
|
|
35
|
+
fields=[
|
|
36
|
+
('node_id', models.AutoField(primary_key=True, serialize=False)),
|
|
37
|
+
('content', models.TextField()),
|
|
38
|
+
('embedding', pgvector.django.VectorField(dimensions=384)),
|
|
39
|
+
('source_file_uuid', models.CharField(max_length=255)),
|
|
40
|
+
('position', models.IntegerField(blank=True, null=True)),
|
|
41
|
+
('custom_metadata', models.JSONField(blank=True, default=dict)),
|
|
42
|
+
('created_at', models.DateTimeField(auto_now_add=True)),
|
|
43
|
+
('updated_at', models.DateTimeField(auto_now=True)),
|
|
44
|
+
('collection', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='nodes', to='application.collection')),
|
|
45
|
+
],
|
|
46
|
+
options={
|
|
47
|
+
'db_table': 'application_nodeentry',
|
|
48
|
+
'indexes': [
|
|
49
|
+
models.Index(fields=['source_file_uuid'], name='application_source__idx'),
|
|
50
|
+
models.Index(fields=['collection', 'source_file_uuid'], name='application_collect_idx'),
|
|
51
|
+
],
|
|
52
|
+
},
|
|
53
|
+
),
|
|
54
|
+
]
|
|
55
|
+
|
|
File without changes
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Django models for Vector Store.
|
|
3
|
+
|
|
4
|
+
This module re-exports models from pg_models to make them discoverable by Django's
|
|
5
|
+
automatic model discovery mechanism, which looks for a models.py file.
|
|
6
|
+
"""
|
|
7
|
+
from rakam_systems_vectorstore.components.vectorstore.pg_models import Collection # noqa: F401
|
|
8
|
+
from rakam_systems_vectorstore.components.vectorstore.pg_models import NodeEntry # noqa: F401
|
|
9
|
+
|
|
10
|
+
__all__ = ["Collection", "NodeEntry"]
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from typing import Dict
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from django.db import models
|
|
7
|
+
from pgvector.django import VectorField
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from rakam_systems_vectorstore.core import Node
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Collection(models.Model):
|
|
14
|
+
"""
|
|
15
|
+
Represents a collection of vector embeddings in the database.
|
|
16
|
+
Model with explicit app_label to avoid Django app registration issues.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
name = models.CharField(max_length=255, unique=True)
|
|
20
|
+
embedding_dim = models.IntegerField(
|
|
21
|
+
default=384
|
|
22
|
+
) # Default dimension, but dynamically set based on embedding model
|
|
23
|
+
created_at = models.DateTimeField(auto_now_add=True)
|
|
24
|
+
updated_at = models.DateTimeField(auto_now=True)
|
|
25
|
+
|
|
26
|
+
class Meta:
|
|
27
|
+
app_label = "application"
|
|
28
|
+
db_table = "application_collection"
|
|
29
|
+
|
|
30
|
+
def __str__(self):
|
|
31
|
+
return f"Collection: {self.name}"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class NodeEntry(models.Model):
|
|
35
|
+
"""
|
|
36
|
+
Represents a stored node entry with content, metadata, and vector embedding.
|
|
37
|
+
|
|
38
|
+
Note: The embedding field uses no fixed dimension, allowing it to store vectors
|
|
39
|
+
of any dimension. This provides flexibility to use different embedding models
|
|
40
|
+
without needing to alter the database schema.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
collection = models.ForeignKey(
|
|
44
|
+
Collection, on_delete=models.CASCADE, related_name="nodes"
|
|
45
|
+
)
|
|
46
|
+
content = models.TextField()
|
|
47
|
+
# No dimension specified - allows vectors of any dimension
|
|
48
|
+
embedding = VectorField(dimensions=None)
|
|
49
|
+
|
|
50
|
+
# Node metadata
|
|
51
|
+
node_id = models.AutoField(primary_key=True)
|
|
52
|
+
source_file_uuid = models.CharField(max_length=255)
|
|
53
|
+
position = models.IntegerField(null=True, blank=True)
|
|
54
|
+
custom_metadata = models.JSONField(default=dict, blank=True)
|
|
55
|
+
|
|
56
|
+
created_at = models.DateTimeField(auto_now_add=True)
|
|
57
|
+
updated_at = models.DateTimeField(auto_now=True)
|
|
58
|
+
|
|
59
|
+
class Meta:
|
|
60
|
+
app_label = "application"
|
|
61
|
+
db_table = "application_nodeentry"
|
|
62
|
+
indexes = [
|
|
63
|
+
models.Index(fields=["source_file_uuid"],
|
|
64
|
+
name="application_source__idx"),
|
|
65
|
+
models.Index(
|
|
66
|
+
fields=["collection", "source_file_uuid"], name="application_collect_idx"),
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
def __str__(self):
|
|
70
|
+
return f"Node {self.node_id}: {self.content[:30]}..."
|
|
71
|
+
|
|
72
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
73
|
+
"""Convert the node entry to a dictionary."""
|
|
74
|
+
return {
|
|
75
|
+
"node_id": self.node_id,
|
|
76
|
+
"content": self.content,
|
|
77
|
+
"source_file_uuid": self.source_file_uuid,
|
|
78
|
+
"position": self.position,
|
|
79
|
+
"custom": self.custom_metadata,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
def to_node(self) -> "Node":
|
|
83
|
+
"""Convert the database entry to a Node object."""
|
|
84
|
+
from rakam_systems_vectorstore.core import Node, NodeMetadata
|
|
85
|
+
|
|
86
|
+
metadata = NodeMetadata(
|
|
87
|
+
source_file_uuid=self.source_file_uuid,
|
|
88
|
+
position=self.position,
|
|
89
|
+
custom=self.custom_metadata,
|
|
90
|
+
)
|
|
91
|
+
metadata.node_id = self.node_id
|
|
92
|
+
|
|
93
|
+
node = Node(content=self.content, metadata=metadata)
|
|
94
|
+
# Convert the database vector field to a numpy array
|
|
95
|
+
node.embedding = self.embedding
|
|
96
|
+
|
|
97
|
+
return node
|