rakam-systems-vectorstore 0.1.1rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. rakam_systems_vectorstore/MANIFEST.in +26 -0
  2. rakam_systems_vectorstore/README.md +1071 -0
  3. rakam_systems_vectorstore/__init__.py +93 -0
  4. rakam_systems_vectorstore/components/__init__.py +0 -0
  5. rakam_systems_vectorstore/components/chunker/__init__.py +19 -0
  6. rakam_systems_vectorstore/components/chunker/advanced_chunker.py +1019 -0
  7. rakam_systems_vectorstore/components/chunker/text_chunker.py +154 -0
  8. rakam_systems_vectorstore/components/embedding_model/__init__.py +0 -0
  9. rakam_systems_vectorstore/components/embedding_model/configurable_embeddings.py +546 -0
  10. rakam_systems_vectorstore/components/embedding_model/openai_embeddings.py +259 -0
  11. rakam_systems_vectorstore/components/loader/__init__.py +31 -0
  12. rakam_systems_vectorstore/components/loader/adaptive_loader.py +512 -0
  13. rakam_systems_vectorstore/components/loader/code_loader.py +699 -0
  14. rakam_systems_vectorstore/components/loader/doc_loader.py +812 -0
  15. rakam_systems_vectorstore/components/loader/eml_loader.py +556 -0
  16. rakam_systems_vectorstore/components/loader/html_loader.py +626 -0
  17. rakam_systems_vectorstore/components/loader/md_loader.py +622 -0
  18. rakam_systems_vectorstore/components/loader/odt_loader.py +750 -0
  19. rakam_systems_vectorstore/components/loader/pdf_loader.py +771 -0
  20. rakam_systems_vectorstore/components/loader/pdf_loader_light.py +723 -0
  21. rakam_systems_vectorstore/components/loader/tabular_loader.py +597 -0
  22. rakam_systems_vectorstore/components/vectorstore/__init__.py +0 -0
  23. rakam_systems_vectorstore/components/vectorstore/apps.py +10 -0
  24. rakam_systems_vectorstore/components/vectorstore/configurable_pg_vector_store.py +1661 -0
  25. rakam_systems_vectorstore/components/vectorstore/faiss_vector_store.py +878 -0
  26. rakam_systems_vectorstore/components/vectorstore/migrations/0001_initial.py +55 -0
  27. rakam_systems_vectorstore/components/vectorstore/migrations/__init__.py +0 -0
  28. rakam_systems_vectorstore/components/vectorstore/models.py +10 -0
  29. rakam_systems_vectorstore/components/vectorstore/pg_models.py +97 -0
  30. rakam_systems_vectorstore/components/vectorstore/pg_vector_store.py +827 -0
  31. rakam_systems_vectorstore/config.py +266 -0
  32. rakam_systems_vectorstore/core.py +8 -0
  33. rakam_systems_vectorstore/pyproject.toml +113 -0
  34. rakam_systems_vectorstore/server/README.md +290 -0
  35. rakam_systems_vectorstore/server/__init__.py +20 -0
  36. rakam_systems_vectorstore/server/mcp_server_vector.py +325 -0
  37. rakam_systems_vectorstore/setup.py +103 -0
  38. rakam_systems_vectorstore-0.1.1rc7.dist-info/METADATA +370 -0
  39. rakam_systems_vectorstore-0.1.1rc7.dist-info/RECORD +40 -0
  40. rakam_systems_vectorstore-0.1.1rc7.dist-info/WHEEL +4 -0
@@ -0,0 +1,55 @@
1
+ # Generated by Django for PgVectorStore
2
+
3
+ from django.db import migrations, models
4
+ import django.db.models.deletion
5
+ import pgvector.django
6
+
7
+
8
+ class Migration(migrations.Migration):
9
+
10
+ initial = True
11
+
12
+ dependencies = [
13
+ ]
14
+
15
+ operations = [
16
+ migrations.RunSQL(
17
+ "CREATE EXTENSION IF NOT EXISTS vector;",
18
+ reverse_sql="DROP EXTENSION IF EXISTS vector;"
19
+ ),
20
+ migrations.CreateModel(
21
+ name='Collection',
22
+ fields=[
23
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
24
+ ('name', models.CharField(max_length=255, unique=True)),
25
+ ('embedding_dim', models.IntegerField(default=384)),
26
+ ('created_at', models.DateTimeField(auto_now_add=True)),
27
+ ('updated_at', models.DateTimeField(auto_now=True)),
28
+ ],
29
+ options={
30
+ 'db_table': 'application_collection',
31
+ },
32
+ ),
33
+ migrations.CreateModel(
34
+ name='NodeEntry',
35
+ fields=[
36
+ ('node_id', models.AutoField(primary_key=True, serialize=False)),
37
+ ('content', models.TextField()),
38
+ ('embedding', pgvector.django.VectorField(dimensions=384)),
39
+ ('source_file_uuid', models.CharField(max_length=255)),
40
+ ('position', models.IntegerField(blank=True, null=True)),
41
+ ('custom_metadata', models.JSONField(blank=True, default=dict)),
42
+ ('created_at', models.DateTimeField(auto_now_add=True)),
43
+ ('updated_at', models.DateTimeField(auto_now=True)),
44
+ ('collection', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='nodes', to='application.collection')),
45
+ ],
46
+ options={
47
+ 'db_table': 'application_nodeentry',
48
+ 'indexes': [
49
+ models.Index(fields=['source_file_uuid'], name='application_source__idx'),
50
+ models.Index(fields=['collection', 'source_file_uuid'], name='application_collect_idx'),
51
+ ],
52
+ },
53
+ ),
54
+ ]
55
+
@@ -0,0 +1,10 @@
1
+ """
2
+ Django models for Vector Store.
3
+
4
+ This module re-exports models from pg_models to make them discoverable by Django's
5
+ automatic model discovery mechanism, which looks for a models.py file.
6
+ """
7
+ from rakam_systems_vectorstore.components.vectorstore.pg_models import Collection # noqa: F401
8
+ from rakam_systems_vectorstore.components.vectorstore.pg_models import NodeEntry # noqa: F401
9
+
10
+ __all__ = ["Collection", "NodeEntry"]
@@ -0,0 +1,97 @@
1
+ from typing import Any
2
+ from typing import Dict
3
+ from typing import Optional
4
+ from typing import TYPE_CHECKING
5
+
6
+ from django.db import models
7
+ from pgvector.django import VectorField
8
+
9
+ if TYPE_CHECKING:
10
+ from rakam_systems_vectorstore.core import Node
11
+
12
+
13
+ class Collection(models.Model):
14
+ """
15
+ Represents a collection of vector embeddings in the database.
16
+ Model with explicit app_label to avoid Django app registration issues.
17
+ """
18
+
19
+ name = models.CharField(max_length=255, unique=True)
20
+ embedding_dim = models.IntegerField(
21
+ default=384
22
+ ) # Default dimension, but dynamically set based on embedding model
23
+ created_at = models.DateTimeField(auto_now_add=True)
24
+ updated_at = models.DateTimeField(auto_now=True)
25
+
26
+ class Meta:
27
+ app_label = "application"
28
+ db_table = "application_collection"
29
+
30
+ def __str__(self):
31
+ return f"Collection: {self.name}"
32
+
33
+
34
+ class NodeEntry(models.Model):
35
+ """
36
+ Represents a stored node entry with content, metadata, and vector embedding.
37
+
38
+ Note: The embedding field uses no fixed dimension, allowing it to store vectors
39
+ of any dimension. This provides flexibility to use different embedding models
40
+ without needing to alter the database schema.
41
+ """
42
+
43
+ collection = models.ForeignKey(
44
+ Collection, on_delete=models.CASCADE, related_name="nodes"
45
+ )
46
+ content = models.TextField()
47
+ # No dimension specified - allows vectors of any dimension
48
+ embedding = VectorField(dimensions=None)
49
+
50
+ # Node metadata
51
+ node_id = models.AutoField(primary_key=True)
52
+ source_file_uuid = models.CharField(max_length=255)
53
+ position = models.IntegerField(null=True, blank=True)
54
+ custom_metadata = models.JSONField(default=dict, blank=True)
55
+
56
+ created_at = models.DateTimeField(auto_now_add=True)
57
+ updated_at = models.DateTimeField(auto_now=True)
58
+
59
+ class Meta:
60
+ app_label = "application"
61
+ db_table = "application_nodeentry"
62
+ indexes = [
63
+ models.Index(fields=["source_file_uuid"],
64
+ name="application_source__idx"),
65
+ models.Index(
66
+ fields=["collection", "source_file_uuid"], name="application_collect_idx"),
67
+ ]
68
+
69
+ def __str__(self):
70
+ return f"Node {self.node_id}: {self.content[:30]}..."
71
+
72
+ def to_dict(self) -> Dict[str, Any]:
73
+ """Convert the node entry to a dictionary."""
74
+ return {
75
+ "node_id": self.node_id,
76
+ "content": self.content,
77
+ "source_file_uuid": self.source_file_uuid,
78
+ "position": self.position,
79
+ "custom": self.custom_metadata,
80
+ }
81
+
82
+ def to_node(self) -> "Node":
83
+ """Convert the database entry to a Node object."""
84
+ from rakam_systems_vectorstore.core import Node, NodeMetadata
85
+
86
+ metadata = NodeMetadata(
87
+ source_file_uuid=self.source_file_uuid,
88
+ position=self.position,
89
+ custom=self.custom_metadata,
90
+ )
91
+ metadata.node_id = self.node_id
92
+
93
+ node = Node(content=self.content, metadata=metadata)
94
+ # Convert the database vector field to a numpy array
95
+ node.embedding = self.embedding
96
+
97
+ return node