novastack-core 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. novastack_core-1.0.0/.gitignore +85 -0
  2. novastack_core-1.0.0/PKG-INFO +30 -0
  3. novastack_core-1.0.0/README.md +9 -0
  4. novastack_core-1.0.0/novastack/core/__init__.py +0 -0
  5. novastack_core-1.0.0/novastack/core/bridge/__init__.py +0 -0
  6. novastack_core-1.0.0/novastack/core/bridge/pydantic.py +69 -0
  7. novastack_core-1.0.0/novastack/core/document/__init__.py +3 -0
  8. novastack_core-1.0.0/novastack/core/document/base.py +117 -0
  9. novastack_core-1.0.0/novastack/core/embeddings/__init__.py +8 -0
  10. novastack_core-1.0.0/novastack/core/embeddings/base.py +101 -0
  11. novastack_core-1.0.0/novastack/core/embeddings/enums.py +9 -0
  12. novastack_core-1.0.0/novastack/core/evaluation/__init__.py +9 -0
  13. novastack_core-1.0.0/novastack/core/evaluation/base.py +63 -0
  14. novastack_core-1.0.0/novastack/core/evaluation/context_similarity.py +188 -0
  15. novastack_core-1.0.0/novastack/core/guardrails/__init__.py +4 -0
  16. novastack_core-1.0.0/novastack/core/guardrails/base.py +25 -0
  17. novastack_core-1.0.0/novastack/core/guardrails/types.py +52 -0
  18. novastack_core-1.0.0/novastack/core/llms/__init__.py +11 -0
  19. novastack_core-1.0.0/novastack/core/llms/base.py +46 -0
  20. novastack_core-1.0.0/novastack/core/llms/decorators.py +267 -0
  21. novastack_core-1.0.0/novastack/core/llms/enums.py +8 -0
  22. novastack_core-1.0.0/novastack/core/llms/types.py +67 -0
  23. novastack_core-1.0.0/novastack/core/loaders/__init__.py +4 -0
  24. novastack_core-1.0.0/novastack/core/loaders/base.py +32 -0
  25. novastack_core-1.0.0/novastack/core/loaders/directory.py +140 -0
  26. novastack_core-1.0.0/novastack/core/observability/__init__.py +3 -0
  27. novastack_core-1.0.0/novastack/core/observability/base.py +46 -0
  28. novastack_core-1.0.0/novastack/core/observability/types.py +41 -0
  29. novastack_core-1.0.0/novastack/core/prompts/__init__.py +3 -0
  30. novastack_core-1.0.0/novastack/core/prompts/base.py +96 -0
  31. novastack_core-1.0.0/novastack/core/prompts/utils.py +64 -0
  32. novastack_core-1.0.0/novastack/core/retrievers/__init__.py +3 -0
  33. novastack_core-1.0.0/novastack/core/retrievers/base.py +28 -0
  34. novastack_core-1.0.0/novastack/core/schema.py +15 -0
  35. novastack_core-1.0.0/novastack/core/text_chunkers/__init__.py +11 -0
  36. novastack_core-1.0.0/novastack/core/text_chunkers/base.py +28 -0
  37. novastack_core-1.0.0/novastack/core/text_chunkers/semantic.py +166 -0
  38. novastack_core-1.0.0/novastack/core/text_chunkers/sentence.py +135 -0
  39. novastack_core-1.0.0/novastack/core/text_chunkers/token.py +129 -0
  40. novastack_core-1.0.0/novastack/core/text_chunkers/utils.py +149 -0
  41. novastack_core-1.0.0/novastack/core/tools/__init__.py +5 -0
  42. novastack_core-1.0.0/novastack/core/tools/base.py +89 -0
  43. novastack_core-1.0.0/novastack/core/vector_stores/__init__.py +3 -0
  44. novastack_core-1.0.0/novastack/core/vector_stores/base.py +84 -0
  45. novastack_core-1.0.0/pyproject.toml +39 -0
@@ -0,0 +1,85 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ #IDE
10
+ .DS_Store
11
+ .idea
12
+ .vscode
13
+
14
+ # Distribution / packaging
15
+ .Python
16
+ build/
17
+ develop-eggs/
18
+ dist/
19
+ downloads/
20
+ eggs/
21
+ .eggs/
22
+ lib/
23
+ lib64/
24
+ parts/
25
+ sdist/
26
+ var/
27
+ wheels/
28
+ share/python-wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+ MANIFEST
33
+
34
+ # PyInstaller
35
+ # Usually these files are written by a python script from a template
36
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
37
+ *.manifest
38
+ *.spec
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+ pip-delete-this-directory.txt
43
+
44
+ # Unit test / coverage reports
45
+ htmlcov/
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ nosetests.xml
52
+ coverage.xml
53
+ *.cover
54
+ *.py,cover
55
+ .hypothesis/
56
+ .pytest_cache/
57
+ cover/
58
+
59
+ # Mkdocs documentation
60
+ docs/_build/
61
+ docs/api_reference/site/
62
+
63
+ # Ruff
64
+ .ruff_cache/
65
+
66
+ # PyBuilder
67
+ .pybuilder/
68
+ target/
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints
72
+
73
+ # pyenv
74
+ # For a library or package, you might want to ignore these files since the code is
75
+ # intended to run in multiple environments; otherwise, check them in:
76
+ .python-version
77
+
78
+ # Environments
79
+ .env
80
+ .venv
81
+ env/
82
+ venv/
83
+ ENV/
84
+ env.bak/
85
+ venv.bak/
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: novastack-core
3
+ Version: 1.0.0
4
+ Summary: Forge data pipelines, unleash smart AI applications.
5
+ Project-URL: Repository, https://github.com/novastack-project/novastack
6
+ Author-email: Leonardo Furnielis <leonardofurnielis@outlook.com>
7
+ License: Apache-2.0
8
+ Keywords: AI,LLM,QA,RAG,data,observability,retrieval,semantic-search
9
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
10
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
11
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
12
+ Requires-Python: <3.14,>=3.11
13
+ Requires-Dist: deprecated<2.0.0,>=1.3.1
14
+ Requires-Dist: nltk<4.0.0,>=3.9.2
15
+ Requires-Dist: numpy<2.0.0,>=1.26.4
16
+ Requires-Dist: pydantic<3.0.0,>=2.12.5
17
+ Requires-Dist: tiktoken<0.13.0,>=0.12.0
18
+ Provides-Extra: dev
19
+ Requires-Dist: ruff>=0.15.8; extra == 'dev'
20
+ Description-Content-Type: text/markdown
21
+
22
+ # NovaStack core
23
+
24
+ Core Python package providing essential abstractions and interfaces for building LLM applications, with a focus on Retrieval-Augmented Generation (RAG).
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install novastack-core
30
+ ```
@@ -0,0 +1,9 @@
1
+ # NovaStack core
2
+
3
+ Core Python package providing essential abstractions and interfaces for building LLM applications, with a focus on Retrieval-Augmented Generation (RAG).
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install novastack-core
9
+ ```
File without changes
File without changes
@@ -0,0 +1,69 @@
1
+ import pydantic
2
+ from pydantic import (
3
+ AnyUrl,
4
+ BaseModel,
5
+ BeforeValidator,
6
+ ConfigDict,
7
+ Field,
8
+ FilePath,
9
+ GetCoreSchemaHandler,
10
+ GetJsonSchemaHandler,
11
+ PlainSerializer,
12
+ PrivateAttr,
13
+ Secret,
14
+ SecretStr,
15
+ SerializationInfo,
16
+ SerializeAsAny,
17
+ SerializerFunctionWrapHandler,
18
+ StrictFloat,
19
+ StrictInt,
20
+ StrictStr,
21
+ TypeAdapter,
22
+ ValidationError,
23
+ ValidationInfo,
24
+ WithJsonSchema,
25
+ WrapSerializer,
26
+ computed_field,
27
+ create_model,
28
+ field_serializer,
29
+ field_validator,
30
+ model_serializer,
31
+ model_validator,
32
+ )
33
+ from pydantic.fields import FieldInfo
34
+ from pydantic.json_schema import JsonSchemaValue
35
+
36
+ __all__ = [
37
+ "pydantic",
38
+ "BaseModel",
39
+ "ConfigDict",
40
+ "GetJsonSchemaHandler",
41
+ "GetCoreSchemaHandler",
42
+ "Field",
43
+ "PlainSerializer",
44
+ "PrivateAttr",
45
+ "model_validator",
46
+ "field_validator",
47
+ "computed_field",
48
+ "create_model",
49
+ "StrictFloat",
50
+ "StrictInt",
51
+ "StrictStr",
52
+ "FieldInfo",
53
+ "ValidationInfo",
54
+ "TypeAdapter",
55
+ "ValidationError",
56
+ "WithJsonSchema",
57
+ "BeforeValidator",
58
+ "JsonSchemaValue",
59
+ "SerializeAsAny",
60
+ "WrapSerializer",
61
+ "field_serializer",
62
+ "Secret",
63
+ "SecretStr",
64
+ "model_serializer",
65
+ "AnyUrl",
66
+ "FilePath",
67
+ "SerializationInfo",
68
+ "SerializerFunctionWrapHandler",
69
+ ]
@@ -0,0 +1,3 @@
1
+ from novastack.core.document.base import BaseDocument, Document, DocumentWithScore
2
+
3
+ __all__ = ["BaseDocument", "Document", "DocumentWithScore"]
@@ -0,0 +1,117 @@
1
+ import uuid
2
+ from abc import ABC, abstractmethod
3
+ from hashlib import sha256
4
+ from typing import Any
5
+
6
+ import numpy as np
7
+ from novastack.core.bridge.pydantic import (
8
+ BaseModel,
9
+ ConfigDict,
10
+ Field,
11
+ computed_field,
12
+ field_validator,
13
+ )
14
+
15
+
16
+ class BaseDocument(BaseModel, ABC):
17
+ """Abstract base class defining the interface for retrievable documents."""
18
+
19
+ model_config = ConfigDict(
20
+ arbitrary_types_allowed=True,
21
+ validate_assignment=True,
22
+ )
23
+
24
+ id_: str = Field(
25
+ default_factory=lambda: str(uuid.uuid4()),
26
+ description="Unique Id of the document.",
27
+ )
28
+ metadata: dict[str, Any] = Field(
29
+ default_factory=dict,
30
+ description="A flat dictionary of metadata fields.",
31
+ )
32
+ embedding: list[float] | np.ndarray | None = Field(
33
+ default=None,
34
+ description="Embedding of the document.",
35
+ )
36
+
37
+ @field_validator("metadata", mode="before")
38
+ @classmethod
39
+ def _validate_metadata(cls, v) -> dict:
40
+ """Ensure metadata is always a dict."""
41
+ if v is None:
42
+ return {}
43
+ return v
44
+
45
+ @abstractmethod
46
+ def get_content(self) -> str:
47
+ """Get document content."""
48
+ raise NotImplementedError(
49
+ f"{self.__class__.__name__} must implement the get_content() method"
50
+ )
51
+
52
+ @property
53
+ @abstractmethod
54
+ def hash(self) -> str:
55
+ """Get document hash."""
56
+ raise NotImplementedError(
57
+ f"{self.__class__.__name__} must implement the get_content() method"
58
+ )
59
+
60
+
61
+ class Document(BaseDocument):
62
+ """Generic interface for data document."""
63
+
64
+ text: str = Field(default="", description="Text content of the document.")
65
+
66
+ @classmethod
67
+ def class_name(cls) -> str:
68
+ return "Document"
69
+
70
+ def get_content(self) -> str:
71
+ """Get the text content."""
72
+ return self.text
73
+
74
+ @computed_field
75
+ @property
76
+ def hash(self) -> str:
77
+ """Get document hash based on text content."""
78
+ return str(sha256(str(self.text).encode("utf-8", "surrogatepass")).hexdigest())
79
+
80
+
81
+ class DocumentWithScore(BaseModel):
82
+ """Document with associated relevance score."""
83
+
84
+ model_config = ConfigDict(
85
+ arbitrary_types_allowed=True,
86
+ validate_assignment=True,
87
+ )
88
+
89
+ document: BaseDocument
90
+ score: float | None = Field(
91
+ default=None,
92
+ description="Relevance score for the document.",
93
+ )
94
+
95
+ @classmethod
96
+ def class_name(cls) -> str:
97
+ return "DocumentWithScore"
98
+
99
+ @property
100
+ def normalized_score(self) -> float:
101
+ """Get normalized score (0.0 if None)."""
102
+ return self.score if self.score is not None else 0.0
103
+
104
+ # #### pass through properties to BaseDocument ####
105
+ @property
106
+ def id_(self) -> str:
107
+ """Get document Id."""
108
+ return self.document.id_
109
+
110
+ @property
111
+ def metadata(self) -> dict:
112
+ """Get document metadata."""
113
+ return self.document.metadata
114
+
115
+ def get_content(self) -> str:
116
+ """Get document content."""
117
+ return self.document.get_content()
@@ -0,0 +1,8 @@
1
+ from novastack.core.embeddings.base import BaseEmbedding, Embedding
2
+ from novastack.core.embeddings.enums import SimilarityMode
3
+
4
+ __all__ = [
5
+ "BaseEmbedding",
6
+ "Embedding",
7
+ "SimilarityMode",
8
+ ]
@@ -0,0 +1,101 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ import numpy as np
4
+ from novastack.core.bridge.pydantic import BaseModel, ConfigDict, Field
5
+ from novastack.core.document import Document
6
+ from novastack.core.embeddings.enums import SimilarityMode
7
+ from novastack.core.schema import TransformerComponent
8
+
9
+ Embedding = list[float]
10
+
11
+
12
+ def similarity(
13
+ embedding1: Embedding,
14
+ embedding2: Embedding,
15
+ mode: SimilarityMode = SimilarityMode.COSINE,
16
+ ) -> float:
17
+ """
18
+ Calculate similarity between two embeddings.
19
+
20
+ Args:
21
+ embedding1: First embedding vector
22
+ embedding2: Second embedding vector
23
+ mode: Similarity calculation mode (cosine, dot_product, or euclidean)
24
+ """
25
+ # Validate embeddings are not empty
26
+ if len(embedding1) == 0 or len(embedding2) == 0:
27
+ raise ValueError("Embeddings cannot be empty")
28
+
29
+ # Validate embeddings have same dimension
30
+ if len(embedding1) != len(embedding2):
31
+ raise ValueError(
32
+ f"Embeddings must have same dimension. "
33
+ f"Got {len(embedding1)} and {len(embedding2)}"
34
+ )
35
+
36
+ if mode == SimilarityMode.EUCLIDEAN:
37
+ return -float(np.linalg.norm(np.array(embedding1) - np.array(embedding2)))
38
+
39
+ elif mode == SimilarityMode.DOT_PRODUCT:
40
+ return float(np.dot(embedding1, embedding2))
41
+
42
+ else:
43
+ # Cosine similarity calculation
44
+ X = np.array(embedding1)
45
+ Y = np.array(embedding2)
46
+ product = np.dot(X, Y)
47
+ norm = np.linalg.norm(X) * np.linalg.norm(Y)
48
+ return float(product / norm)
49
+
50
+
51
+ class BaseEmbedding(BaseModel, TransformerComponent, ABC):
52
+ """
53
+ Abstract base class defining the interface for embedding models.
54
+ """
55
+
56
+ model_config = ConfigDict(
57
+ arbitrary_types_allowed=True,
58
+ validate_assignment=True,
59
+ )
60
+
61
+ model_name: str = Field(..., description="Name of the embedding model")
62
+
63
+ @classmethod
64
+ def class_name(cls) -> str:
65
+ return "BaseEmbedding"
66
+
67
+ @staticmethod
68
+ def similarity(
69
+ embedding1: Embedding,
70
+ embedding2: Embedding,
71
+ mode: SimilarityMode = SimilarityMode.COSINE,
72
+ ):
73
+ """Get embedding similarity."""
74
+ return similarity(embedding1, embedding2, mode)
75
+
76
+ @abstractmethod
77
+ def embed_text(self, input: str | list[str]) -> list[Embedding]:
78
+ """
79
+ Embed one or more text strings.
80
+
81
+ Args:
82
+ input: Single text string or list of text strings to embed
83
+ """
84
+
85
+ def embed_documents(self, documents: list[Document]) -> list[Document]:
86
+ """
87
+ Embed a list of documents and assign the computed embeddings to the 'embedding' attribute.
88
+
89
+ Args:
90
+ documents (list[Document]): List of documents to compute embeddings.
91
+ """
92
+ texts = [document.get_content() for document in documents]
93
+ embeddings = self.embed_text(texts)
94
+
95
+ for document, embedding in zip(documents, embeddings):
96
+ document.embedding = embedding
97
+
98
+ return documents
99
+
100
+ def __call__(self, documents: list[Document]) -> list[Document]:
101
+ return self.embed_documents(documents)
@@ -0,0 +1,9 @@
1
+ from enum import Enum
2
+
3
+
4
+ class SimilarityMode(str, Enum):
5
+ """Modes for similarity."""
6
+
7
+ COSINE = "cosine"
8
+ DOT_PRODUCT = "dot_product"
9
+ EUCLIDEAN = "euclidean"
@@ -0,0 +1,9 @@
1
+ from novastack.core.evaluation.base import BaseEvaluator
2
+ from novastack.core.evaluation.context_similarity import (
3
+ ContextSimilarityEvaluator,
4
+ )
5
+
6
+ __all__ = [
7
+ "BaseEvaluator",
8
+ "ContextSimilarityEvaluator",
9
+ ]
@@ -0,0 +1,63 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any
3
+
4
+ from novastack.core.bridge.pydantic import BaseModel, ConfigDict, Field, field_validator
5
+
6
+
7
+ class BaseEvaluator(BaseModel, ABC):
8
+ """
9
+ Abstract base class defining the interface for evaluation metrics.
10
+
11
+ All evaluators should inherit from this class and implement the evaluate method.
12
+ """
13
+
14
+ model_config = ConfigDict(
15
+ arbitrary_types_allowed=True,
16
+ validate_assignment=True,
17
+ )
18
+
19
+ score_threshold: float = Field(
20
+ default=0.8,
21
+ ge=0.0,
22
+ le=1.0,
23
+ description="Minimum required score for evaluation approval",
24
+ )
25
+
26
+ @field_validator("score_threshold")
27
+ @classmethod
28
+ def _validate_threshold(cls, v: float) -> float:
29
+ """Validate that threshold is within valid range."""
30
+ if not 0.0 <= v <= 1.0:
31
+ raise ValueError(f"score_threshold must be between 0.0 and 1.0, got: {v}")
32
+ return v
33
+
34
+ @classmethod
35
+ def class_name(cls) -> str:
36
+ return "BaseEvaluator"
37
+
38
+ @abstractmethod
39
+ def evaluate(
40
+ self,
41
+ query: str | None = None,
42
+ generated_text: str | None = None,
43
+ contexts: list[str] | None = None,
44
+ **kwargs: Any,
45
+ ) -> dict:
46
+ """
47
+ Evaluate the given inputs and return evaluation results.
48
+
49
+ This method should be implemented by all concrete evaluator classes.
50
+ The specific parameters will vary depending on the evaluation type.
51
+
52
+ Returns:
53
+ dict: Dictionary containing evaluation results. Should include at minimum:
54
+ - score (float): The evaluation score
55
+ - passing (bool): Whether the evaluation passed a threshold
56
+ Additional keys can be included for specific evaluation details.
57
+
58
+ Raises:
59
+ NotImplementedError: If the method is not implemented by a subclass.
60
+ """
61
+ raise NotImplementedError(
62
+ f"{self.__class__.__name__} must implement the evaluate() method"
63
+ )