levelapp 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. levelapp/__init__.py +0 -0
  2. levelapp/aspects/__init__.py +8 -0
  3. levelapp/aspects/loader.py +253 -0
  4. levelapp/aspects/logger.py +59 -0
  5. levelapp/aspects/monitor.py +617 -0
  6. levelapp/aspects/sanitizer.py +168 -0
  7. levelapp/clients/__init__.py +122 -0
  8. levelapp/clients/anthropic.py +112 -0
  9. levelapp/clients/gemini.py +130 -0
  10. levelapp/clients/groq.py +101 -0
  11. levelapp/clients/huggingface.py +162 -0
  12. levelapp/clients/ionos.py +126 -0
  13. levelapp/clients/mistral.py +106 -0
  14. levelapp/clients/openai.py +116 -0
  15. levelapp/comparator/__init__.py +5 -0
  16. levelapp/comparator/comparator.py +232 -0
  17. levelapp/comparator/extractor.py +108 -0
  18. levelapp/comparator/schemas.py +61 -0
  19. levelapp/comparator/scorer.py +269 -0
  20. levelapp/comparator/utils.py +136 -0
  21. levelapp/config/__init__.py +5 -0
  22. levelapp/config/endpoint.py +199 -0
  23. levelapp/config/prompts.py +57 -0
  24. levelapp/core/__init__.py +0 -0
  25. levelapp/core/base.py +386 -0
  26. levelapp/core/schemas.py +24 -0
  27. levelapp/core/session.py +336 -0
  28. levelapp/endpoint/__init__.py +0 -0
  29. levelapp/endpoint/client.py +188 -0
  30. levelapp/endpoint/client_test.py +41 -0
  31. levelapp/endpoint/manager.py +114 -0
  32. levelapp/endpoint/parsers.py +119 -0
  33. levelapp/endpoint/schemas.py +38 -0
  34. levelapp/endpoint/tester.py +52 -0
  35. levelapp/evaluator/__init__.py +3 -0
  36. levelapp/evaluator/evaluator.py +307 -0
  37. levelapp/metrics/__init__.py +63 -0
  38. levelapp/metrics/embedding.py +56 -0
  39. levelapp/metrics/embeddings/__init__.py +0 -0
  40. levelapp/metrics/embeddings/sentence_transformer.py +30 -0
  41. levelapp/metrics/embeddings/torch_based.py +56 -0
  42. levelapp/metrics/exact.py +182 -0
  43. levelapp/metrics/fuzzy.py +80 -0
  44. levelapp/metrics/token.py +103 -0
  45. levelapp/plugins/__init__.py +0 -0
  46. levelapp/repository/__init__.py +3 -0
  47. levelapp/repository/filesystem.py +203 -0
  48. levelapp/repository/firestore.py +291 -0
  49. levelapp/simulator/__init__.py +3 -0
  50. levelapp/simulator/schemas.py +116 -0
  51. levelapp/simulator/simulator.py +531 -0
  52. levelapp/simulator/utils.py +134 -0
  53. levelapp/visualization/__init__.py +7 -0
  54. levelapp/visualization/charts.py +358 -0
  55. levelapp/visualization/dashboard.py +240 -0
  56. levelapp/visualization/exporter.py +167 -0
  57. levelapp/visualization/templates/base.html +158 -0
  58. levelapp/visualization/templates/comparator_dashboard.html +57 -0
  59. levelapp/visualization/templates/simulator_dashboard.html +111 -0
  60. levelapp/workflow/__init__.py +6 -0
  61. levelapp/workflow/base.py +192 -0
  62. levelapp/workflow/config.py +96 -0
  63. levelapp/workflow/context.py +64 -0
  64. levelapp/workflow/factory.py +42 -0
  65. levelapp/workflow/registration.py +6 -0
  66. levelapp/workflow/runtime.py +19 -0
  67. levelapp-0.1.15.dist-info/METADATA +571 -0
  68. levelapp-0.1.15.dist-info/RECORD +70 -0
  69. levelapp-0.1.15.dist-info/WHEEL +4 -0
  70. levelapp-0.1.15.dist-info/licenses/LICENSE +0 -0
@@ -0,0 +1,56 @@
1
+ """levelapp/metrics/embeddings/torch_based.py"""
2
+ import torch
3
+
4
+ from typing import Any, Dict
5
+ from transformers import AutoTokenizer, AutoModel
6
+
7
+ from levelapp.core.base import BaseMetric
8
+
9
+
10
+ class TorchEmbeddingMetric(BaseMetric):
11
+ """Embedding similarity using a Transformer model (mean-pooled embeddings)."""
12
+ def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2", **kwargs):
13
+ super().__init__(processor=kwargs.get("processor"), score_cutoff=kwargs.get("score_cutoff"))
14
+ self.model_name = model_name
15
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+
17
+ # Lazy load model
18
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ self.model = AutoModel.from_pretrained(model_name).to(self.device)
20
+
21
+ @torch.no_grad()
22
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
23
+ self._validate_inputs(generated=generated, reference=reference)
24
+
25
+ encoded_input = self.tokenizer(
26
+ [reference, generated],
27
+ padding=True,
28
+ truncation=True,
29
+ return_tensors="pt"
30
+ ).to(self.device)
31
+ model_output = self.model(**encoded_input)
32
+
33
+ # Mean pooling
34
+ embeddings = self._mean_pooling(model_output, encoded_input["attention_mask"])
35
+ embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=-1)
36
+
37
+ # Cosine similarity
38
+ similarity = torch.nn.functional.cosine_similarity(embeddings[0], embeddings[1], dim=0).item()
39
+
40
+ return {
41
+ "similarity": similarity,
42
+ "metadata": self._build_metadata(
43
+ backend="torch",
44
+ model=self.model_name,
45
+ device=str(self.device),
46
+ )
47
+ }
48
+
49
+ @staticmethod
50
+ def _mean_pooling(model_output, attention_mask):
51
+ token_embeddings = model_output[0]
52
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
53
+ sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
54
+ sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
55
+
56
+ return sum_embeddings / sum_mask
@@ -0,0 +1,182 @@
1
+ """levelapp/metrics/exact.py"""
2
+ from typing import Dict, Any
3
+
4
+ from rapidfuzz import distance
5
+
6
+ from levelapp.core.base import BaseMetric
7
+ from levelapp.aspects.monitor import MonitoringAspect, MetricType
8
+
9
+
10
+ class ExactMatch(BaseMetric):
11
+ """Binary exact match comparison (1.0 for exact match, 0.0 otherwise)"""
12
+
13
+ @MonitoringAspect.monitor(name="exact_match", category=MetricType.SCORING, cached=True, enable_timing=True)
14
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
15
+ """"
16
+ Compute the exact match score between generated and reference strings.
17
+
18
+ Args:
19
+ generated (str): The text generated by the agent.
20
+ reference (str): The expected reference text.
21
+
22
+ Returns:
23
+ Dict[str, Any]: A dictionary containing the exact match score and metadata.
24
+ """
25
+ self._validate_inputs(generated=generated, reference=reference)
26
+
27
+ score = distance.Levenshtein.normalized_similarity(
28
+ s1=generated,
29
+ s2=reference,
30
+ processor=self.processor,
31
+ score_cutoff=1.0
32
+ )
33
+
34
+ return {
35
+ "score": score,
36
+ "metadata": self._build_metadata(
37
+ generated_length=len(generated),
38
+ reference_length=len(reference)
39
+ )
40
+ }
41
+
42
+
43
+ class Levenshtein(BaseMetric):
44
+ """Levenshtein edit distance (number of insertions, deletions, substitutions)"""
45
+
46
+ @MonitoringAspect.monitor(name="levenshtein", category=MetricType.SCORING, cached=True, enable_timing=True)
47
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
48
+ """
49
+ Compute the Levenshtein distance score between generated and reference strings.
50
+
51
+ Args:
52
+ generated (str): The text generated by the agent.
53
+ reference (str): The expected reference text.
54
+
55
+ Returns:
56
+ Dict[str, Any]: A dictionary containing the Levenshtein score and metadata.
57
+ """
58
+ self._validate_inputs(generated=generated, reference=reference)
59
+
60
+ score = distance.Levenshtein.normalized_similarity(
61
+ s1=generated,
62
+ s2=reference,
63
+ processor=self.processor,
64
+ score_cutoff=self.score_cutoff or 1.0
65
+ )
66
+
67
+ return {
68
+ "score": score,
69
+ "metadata": self._build_metadata(
70
+ generated_length=len(generated),
71
+ reference_length=len(reference)
72
+ )
73
+ }
74
+
75
+
76
+ class JaroWinkler(BaseMetric):
77
+ """Jaro-Winkler distance (similarity measure for strings)"""
78
+
79
+ @MonitoringAspect.monitor(name="jaro-winkler", category=MetricType.SCORING, cached=True, enable_timing=True)
80
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
81
+ """
82
+ Compute the Jaro-Winkler distance score between generated and reference strings.
83
+
84
+ Args:
85
+ generated (str): The text generated by the agent.
86
+ reference (str): The expected reference text.
87
+
88
+ Returns:
89
+ Dict[str, Any]: A dictionary containing the Jaro-Winkler score and metadata.
90
+ """
91
+ self._validate_inputs(generated=generated, reference=reference)
92
+
93
+ score = distance.JaroWinkler.normalized_similarity(
94
+ s1=generated,
95
+ s2=reference,
96
+ processor=self.processor,
97
+ score_cutoff=self.score_cutoff
98
+ )
99
+
100
+ return {
101
+ "score": score,
102
+ "metadata": self._build_metadata(
103
+ generated_length=len(generated),
104
+ reference_length=len(reference)
105
+ )
106
+ }
107
+
108
+
109
+ class Hamming(BaseMetric):
110
+ """Hamming distance (character substitutions only, for equal-length strings)"""
111
+
112
+ @MonitoringAspect.monitor(name="hamming", category=MetricType.SCORING, cached=True, enable_timing=True)
113
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
114
+ """
115
+ Compute the Hamming distance score between generated and reference strings.
116
+
117
+ Args:
118
+ generated (str): The text generated by the agent.
119
+ reference (str): The expected reference text.
120
+
121
+ Returns:
122
+ Dict[str, Any]: A dictionary containing the Hamming score and metadata.
123
+ """
124
+ self._validate_inputs(generated=generated, reference=reference)
125
+
126
+ score = distance.Hamming.normalized_similarity(
127
+ s1=generated,
128
+ s2=reference,
129
+ processor=self.processor,
130
+ score_cutoff=self.score_cutoff
131
+ )
132
+
133
+ return {
134
+ "score": score,
135
+ "metadata": self._build_metadata(
136
+ generated_length=len(generated),
137
+ reference_length=len(reference)
138
+ )
139
+ }
140
+
141
+
142
+ class PrefixMatch(BaseMetric):
143
+ """Prefix similarity (1.0 if generated starts with reference)"""
144
+
145
+ @MonitoringAspect.monitor(name="prefix-match", category=MetricType.SCORING, cached=True, enable_timing=True)
146
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
147
+ """
148
+ Compute the Prefix similarity score between generated and reference strings.
149
+
150
+ Args:
151
+ generated (str): The text generated by the agent.
152
+ reference (str): The expected reference text.
153
+
154
+ Returns:
155
+ Dict[str, Any]: A dictionary containing the Prefix similarity and metadata.
156
+ """
157
+ self._validate_inputs(generated=generated, reference=reference)
158
+
159
+ score = distance.Prefix.normalized_similarity(
160
+ s1=generated,
161
+ s2=reference,
162
+ processor=self.processor,
163
+ score_cutoff=self.score_cutoff
164
+ )
165
+
166
+ return {
167
+ "score": score,
168
+ "metadata": self._build_metadata(
169
+ generated_length=len(generated),
170
+ reference_length=len(reference)
171
+ )
172
+ }
173
+
174
+
175
+ # Registry of all exact metrics
176
+ EXACT_METRICS = {
177
+ "exact_match": ExactMatch,
178
+ "levenshtein": Levenshtein,
179
+ "jaro_winkler": JaroWinkler,
180
+ "hamming": Hamming,
181
+ "prefix_match": PrefixMatch
182
+ }
@@ -0,0 +1,80 @@
1
+ """levelapp/metrics/fuzzy.py"""
2
+ from rapidfuzz import fuzz
3
+
4
+ from typing import Dict, Any
5
+
6
+ from levelapp.core.base import BaseMetric
7
+ from levelapp.aspects.monitor import MonitoringAspect, MetricType
8
+
9
+
10
+ class FuzzyRatio(BaseMetric):
11
+ """A metric that computes the fuzzy ratio between two texts."""
12
+
13
+ @MonitoringAspect.monitor(name="fuzzy-ratio", category=MetricType.API_CALL, cached=True, enable_timing=True)
14
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
15
+ """
16
+ Compute the fuzzy ratio between the generated text and the reference text.
17
+
18
+ Args:
19
+ generated (str): The text generated by the agent.
20
+ reference (str): The expected reference text.
21
+
22
+ Returns:
23
+ Dict[str, Any]: A dictionary containing the fuzzy ratio score and metadata.
24
+ """
25
+ score = fuzz.ratio(
26
+ s1=generated,
27
+ s2=reference,
28
+ processor=self.processor,
29
+ score_cutoff=self.score_cutoff
30
+ )
31
+
32
+ # TODO-0: Return results as Pydantic model.
33
+ return {
34
+ "score": score / 100,
35
+ "metadata": self._build_metadata(
36
+ generated_length=len(generated),
37
+ reference_length=len(reference)
38
+ )
39
+ }
40
+
41
+
42
+ class PartialRatio(BaseMetric):
43
+ """
44
+ A metric that computes the partial fuzzy ratio between two texts.
45
+ This is useful for evaluating how similar two pieces of text are,
46
+ allowing for partial matches.
47
+ """
48
+
49
+ @MonitoringAspect.monitor(name="partial-ratio", category=MetricType.SCORING, cached=True, enable_timing=True)
50
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
51
+ """
52
+ Compute the partial fuzzy ratio between the generated text and the reference text.
53
+
54
+ Args:
55
+ generated (str): The text generated by the agent.
56
+ reference (str): The expected reference text.
57
+
58
+ Returns:
59
+ Dict[str, Any]: A dictionary containing the partial fuzzy ratio.
60
+ """
61
+ score = fuzz.partial_ratio(
62
+ s1=generated,
63
+ s2=reference,
64
+ processor=self.processor,
65
+ score_cutoff=self.score_cutoff
66
+ )
67
+
68
+ return {
69
+ "score": score / 100,
70
+ "metadata": self._build_metadata(
71
+ generated_length=len(generated),
72
+ reference_length=len(reference)
73
+ )
74
+ }
75
+
76
+
77
+ FUZZY_METRICS = {
78
+ "fuzzy_ratio": FuzzyRatio,
79
+ "partial_ratio": PartialRatio,
80
+ }
@@ -0,0 +1,103 @@
1
+ """levelapp/metrics/token.py"""
2
+ from rapidfuzz import fuzz
3
+
4
+ from typing import Dict, Any
5
+
6
+ from levelapp.core.base import BaseMetric
7
+ from levelapp.aspects.monitor import MonitoringAspect, MetricType
8
+
9
+
10
+ class WeightedRatio(BaseMetric):
11
+ """A metric that calculates a weighted ratio based on the other ratio algorithms"""
12
+
13
+ @MonitoringAspect.monitor(name="weighted-ratio", category=MetricType.SCORING, cached=True, enable_timing=True)
14
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
15
+ """
16
+ Compute the token-based metric between the generated text and the reference text.
17
+
18
+ Args:
19
+ generated (str): The text generated by the agent.
20
+ reference (str): The expected reference text.
21
+
22
+ Returns:
23
+ Dict[str, Any]: A dictionary containing the score and metadata.
24
+ """
25
+ score = fuzz.WRatio(
26
+ s1=generated,
27
+ s2=reference,
28
+ processor=self.processor,
29
+ score_cutoff=self.score_cutoff
30
+ )
31
+
32
+ return {
33
+ "score": score / 100,
34
+ "metadata": self._build_metadata(
35
+ generated_length=len(generated),
36
+ reference_length=len(reference)
37
+ )
38
+ }
39
+
40
+
41
+ class TokenSetRatio(BaseMetric):
42
+ """
43
+ A metric that compares the words in the strings based
44
+ on unique and common words between them using fuzz.ratio.
45
+ """
46
+
47
+ @MonitoringAspect.monitor(name="token-set-ratio", category=MetricType.SCORING, cached=True, enable_timing=True)
48
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
49
+ """
50
+ Compute the token-based metric between the generated text and the reference text.
51
+
52
+ Args:
53
+ generated (str): The text generated by the agent.
54
+ reference (str): The expected reference text.
55
+
56
+ Returns:
57
+ Dict[str, Any]: A dictionary containing the score and metadata.
58
+ """
59
+ score = fuzz.token_set_ratio(
60
+ s1=generated,
61
+ s2=reference,
62
+ processor=self.processor,
63
+ score_cutoff=self.score_cutoff
64
+ )
65
+
66
+ return {
67
+ "score": score / 100,
68
+ "metadata": self._build_metadata(
69
+ generated_length=len(generated),
70
+ reference_length=len(reference)
71
+ )
72
+ }
73
+
74
+
75
+ class TokenSortRatio(BaseMetric):
76
+ """A metric that sorts the words in the strings and calculates the fuzz.ratio between them."""
77
+
78
+ @MonitoringAspect.monitor(name="token-sort-ratio", category=MetricType.SCORING, cached=True, enable_timing=True)
79
+ def compute(self, generated: str, reference: str) -> Dict[str, Any]:
80
+ """
81
+ Compute the token-based metric between the generated text and the reference text.
82
+
83
+ Args:
84
+ generated (str): The text generated by the agent.
85
+ reference (str): The expected reference text.
86
+
87
+ Returns:
88
+ Dict[str, Any]: A dictionary containing the score and metadata.
89
+ """
90
+ score = fuzz.token_sort_ratio(
91
+ s1=generated,
92
+ s2=reference,
93
+ processor=self.processor,
94
+ score_cutoff=self.score_cutoff
95
+ )
96
+
97
+ return {
98
+ "score": score / 100,
99
+ "metadata": self._build_metadata(
100
+ generated_length=len(generated),
101
+ reference_length=len(reference)
102
+ )
103
+ }
File without changes
@@ -0,0 +1,3 @@
1
+ from .firestore import FirestoreRepository
2
+
3
+ __all__ = ['FirestoreRepository']
@@ -0,0 +1,203 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from typing import List, Dict, Any, Type, TYPE_CHECKING
5
+
6
+ from pydantic.v1 import ValidationError
7
+
8
+ from levelapp.core.base import BaseRepository, Model
9
+ from levelapp.aspects import logger
10
+
11
+ if TYPE_CHECKING:
12
+ from levelapp.workflow.config import WorkflowConfig
13
+
14
+
15
+ class FileSystemRepository(BaseRepository):
16
+ """
17
+ File-system implementation of BaseRepository.
18
+ Persists Pydantic model data as JSON files under the configured base path.
19
+ """
20
+ def __init__(self, config: "WorkflowConfig | None" = None):
21
+ self._CLASS_NAME = self.__class__.__name__
22
+
23
+ self.config = config
24
+ base_path = getattr(config.repository, "base_path", "./data") if config else "./data"
25
+ self.base_path = Path(base_path).resolve()
26
+ self.base_path.mkdir(parents=True, exist_ok=True)
27
+ logger.info(f"[{self.__class__.__name__}] Base path: {base_path}")
28
+
29
+ def connect(self) -> None:
30
+ """No-op for local storage."""
31
+ if not self.base_path.exists():
32
+ self.base_path.mkdir(parents=True, exist_ok=True)
33
+ logger.info(f"[{self._CLASS_NAME}] connected to {self.base_path}")
34
+
35
+ def close(self) -> None:
36
+ """No-op for local storage."""
37
+ logger.info(f"[{self._CLASS_NAME}] Closed (no active connections)")
38
+
39
+ def _compose_path(
40
+ self,
41
+ collection_id: str,
42
+ section_id: str,
43
+ sub_collection_id: str,
44
+ document_id: str,
45
+ ) -> Path:
46
+ """
47
+ Compose the hierarchical path for a document.
48
+
49
+ Args:
50
+ collection_id (str): the ID for the whole collection.
51
+ section_id (str): the ID for the section.
52
+ sub_collection_id (str): the ID for the sub collection.
53
+ document_id (str): the ID for the document.
54
+
55
+ Returns:
56
+ Path: the composed path.
57
+ """
58
+ path = self.base_path / collection_id / section_id / sub_collection_id
59
+ path.mkdir(parents=True, exist_ok=True)
60
+ return path / f"{document_id}.json"
61
+
62
+ def retrieve_document(
63
+ self,
64
+ collection_id: str,
65
+ section_id: str,
66
+ sub_collection_id: str,
67
+ document_id: str,
68
+ model_type: Type[Model]
69
+ ) -> Model | None:
70
+ """
71
+ Retrieve a document from the local JSON file system.
72
+
73
+ Args:
74
+ collection_id (str): the ID for the whole collection.
75
+ section_id (str): the ID for the section.
76
+ sub_collection_id (str): the ID for the sub collection.
77
+ document_id (str): the ID for the document.
78
+ model_type (Type[Model]): Pydantic model for parsing.
79
+
80
+ Returns:
81
+ Model | None: An instance of the provided model.
82
+ """
83
+ path = self._compose_path(collection_id, section_id, sub_collection_id, document_id)
84
+ if not path.exists():
85
+ logger.warning(f"[{self._CLASS_NAME}] Document '{path}' no found")
86
+ return None
87
+
88
+ try:
89
+ with path.open("r", encoding="utf-8") as f:
90
+ data = json.load(f)
91
+
92
+ return model_type.model_validate(data)
93
+
94
+ except json.JSONDecodeError as e:
95
+ logger.error(f"[{self._CLASS_NAME}] Failed to load the JSON file '{document_id}':\n{e}")
96
+ return None
97
+
98
+ except ValidationError as e:
99
+ logger.error(f"[{self._CLASS_NAME}] Failed to instantiate a Pydantic model for file '{document_id}':\n{e}")
100
+ return None
101
+
102
+ except Exception as e:
103
+ logger.exception(f"[{self._CLASS_NAME}] Unexpected error retrieving file '{document_id}':\n{e}")
104
+ return None
105
+
106
+ def store_document(
107
+ self,
108
+ collection_id: str,
109
+ section_id: str,
110
+ sub_collection_id: str,
111
+ document_id: str,
112
+ data: Model
113
+ ) -> None:
114
+ """
115
+ Store a document as JSON file locally.
116
+
117
+ Args:
118
+ collection_id (str): the ID for the whole collection.
119
+ section_id (str): the ID for the section.
120
+ sub_collection_id (str): the ID for the sub collection.
121
+ document_id (str): the ID for the document.
122
+ data (Model): Pydantic model for parsing.
123
+ """
124
+ path = self._compose_path(collection_id, section_id, sub_collection_id, document_id)
125
+
126
+ try:
127
+ with path.open("w", encoding="utf-8") as f:
128
+ json.dump(data.model_dump(), f, ensure_ascii=False, indent=2)
129
+ logger.info(f"[{self._CLASS_NAME}] Stored document '{document_id}' in '{path}'")
130
+
131
+ except Exception as e:
132
+ logger.exception(f"[{self._CLASS_NAME}] Failed to store document '{document_id}' in '{path}':\n{e}'")
133
+
134
+ def query_collection(
135
+ self,
136
+ collection_id: str,
137
+ section_id: str,
138
+ sub_collection_id: str,
139
+ filters: Dict[str, Any],
140
+ model_type: Type[Model]
141
+ ) -> List[Model]:
142
+ """
143
+ Query all document in a sub collection, applying simple equality filters.
144
+
145
+ Args:
146
+ collection_id (str): the ID for the whole collection.
147
+ section_id (str): the ID for the section.
148
+ sub_collection_id (str): the ID for the sub collection.
149
+ filters (Dict[str, Any]): Pydantic model for parsing.
150
+ model_type (Type[Model]): Pydantic model for parsing.
151
+
152
+ Returns:
153
+ List[Model]: List of deserialized models that match the query.
154
+ """
155
+ path = self.base_path / collection_id / section_id / sub_collection_id
156
+
157
+ if not path.exists():
158
+ logger.warning(f"[{self._CLASS_NAME}] Sub-collection '{path}' not found")
159
+ return []
160
+
161
+ results = []
162
+ try:
163
+ for file in path.glob("*.json"):
164
+ with file.open("r", encoding="utf-8") as f:
165
+ data = json.load(f)
166
+
167
+ if all(data.get(k) == v for k, v in filters.items()):
168
+ results.append(model_type.model_validate(data))
169
+
170
+ except json.JSONDecodeError as e:
171
+ logger.error(f"[{self._CLASS_NAME}] Failed to read JSON files content:\n{e}")
172
+
173
+ except ValidationError as e:
174
+ logger.error(f"[{self._CLASS_NAME}] Failed to parse JSON files content:\n{e}")
175
+
176
+ return results
177
+
178
+ def delete_document(
179
+ self,
180
+ collection_id: str,
181
+ section_id: str,
182
+ sub_collection_id: str,
183
+ document_id: str
184
+ ) -> bool:
185
+ """Delete a JSON document from the local file system."""
186
+ path = self._compose_path(collection_id, section_id, sub_collection_id, document_id)
187
+
188
+ if not path.exists():
189
+ logger.warning(f"[{self._CLASS_NAME}] Document '{path}' not found")
190
+ return False
191
+
192
+ try:
193
+ path.unlink()
194
+ logger.info(f"[{self._CLASS_NAME}] Deleted document '{document_id}'")
195
+ return True
196
+
197
+ except FileNotFoundError:
198
+ logger.warning(f"[{self._CLASS_NAME}] Document '{document_id}' not found")
199
+ return False
200
+
201
+ except Exception as e:
202
+ logger.exception(f"[{self._CLASS_NAME}] Failed to delete document '{document_id}':\n{e}")
203
+ return False