remdb 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.0.dist-info/METADATA +1455 -0
  185. remdb-0.3.0.dist-info/RECORD +187 -0
  186. remdb-0.3.0.dist-info/WHEEL +4 -0
  187. remdb-0.3.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,211 @@
1
+ description: "You are THE JUDGE evaluating REM retrieval quality using recall metrics.\n\
2
+ \n**Context Recall Evaluation (inspired by RAGAS)**\n\nYour job is to evaluate whether\
3
+ \ REM query execution retrieves ALL relevant entities\nthat should be found for\
4
+ \ a given query.\n\n**Key Concept: Recall**\n\nRecall measures: \"Of all the relevant\
5
+ \ entities that SHOULD be retrieved, how many were actually retrieved?\"\n\nFormula:\
6
+ \ Retrieved relevant entities / Total relevant entities (from golden set)\n\n**The\
7
+ \ Coverage Problem:**\n\n- **High Precision, Low Recall**: Retrieved entities are\
8
+ \ relevant, but many are missing\n- **Low Precision, High Recall**: Retrieved many\
9
+ \ entities, but also grabbed irrelevant ones\n- **Goal**: High precision AND high\
10
+ \ recall\n\n**Your Task:**\n\n1. **Review expected entities** from golden set (what\
11
+ \ SHOULD be retrieved)\n2. **Review retrieved entities** from REM query\n3. **Calculate\
12
+ \ recall** - what fraction of expected entities were found?\n4. **Identify gaps**\
13
+ \ - which expected entities are missing?\n\n**Example Evaluation:**\n\nQuery: \"\
14
+ SEARCH person AI engineer with database experience\"\n\nExpected Entities (from\
15
+ \ golden set):\n- sarah-chen (person) - \"AI engineer with 5 years PostgreSQL experience\"\
16
+ \n- alice-wang (person) - \"Database administrator with ML background\"\n- eve-jones\
17
+ \ (person) - \"Data scientist with PostgreSQL expertise\"\n\nRetrieved Entities:\n\
18
+ - sarah-chen ✓ (found)\n- john-doe (not expected - false positive)\n- alice-wang\
19
+ \ ✓ (found)\n- bob-smith (not expected - false positive)\n\nRecall Calculation:\n\
20
+ - Found: sarah-chen, alice-wang (2 entities)\n- Expected: sarah-chen, alice-wang,\
21
+ \ eve-jones (3 entities)\n- Recall: 2/3 = 0.67 (67%)\n\nMissing: eve-jones (why?\
22
+ \ Bad embedding? Wrong query parsing?)\n\n**Recall Criteria:**\n\nFor each expected\
23
+ \ entity from golden set:\n1. Was it retrieved? (present in results)\n2. If not,\
24
+ \ why might it be missing?\n - Embedding quality issue?\n - Query parsing problem?\n\
25
+ \ - Entity missing from database?\n - Ranking too low (buried beyond top-K)?\n\
26
+ \n**Scoring Rules:**\n\n**Recall Score (0.0-1.0):**\n- 1.0: All expected entities\
27
+ \ retrieved\n- 0.8: Missing 1 expected entity (90%+ recall)\n- 0.6: Missing 2-3\
28
+ \ expected entities (60-80% recall)\n- 0.4: Missing several expected entities (40-60%\
29
+ \ recall)\n- 0.2: Missing most expected entities (20-40% recall)\n- 0.0: Missing\
30
+ \ all expected entities (0% recall)\n\n**Ranking Depth (0.0-1.0):**\n- How deep\
31
+ \ in results are expected entities found?\n- 1.0: All expected entities in top 3\
32
+ \ positions\n- 0.8: All expected entities in top 5 positions\n- 0.6: All expected\
33
+ \ entities in top 10 positions\n- 0.4: Some expected entities beyond position 10\n\
34
+ - 0.2: Expected entities buried deep in results\n- 0.0: Expected entities not found\
35
+ \ at all\n\n**Coverage Quality (0.0-1.0):**\n- Balance between recall and precision\n\
36
+ - 1.0: High recall (>0.9) AND high precision (>0.8)\n- 0.8: Good recall (>0.7) AND\
37
+ \ good precision (>0.6)\n- 0.6: Moderate recall (>0.5) AND moderate precision (>0.5)\n\
38
+ - 0.4: Poor recall or precision\n- 0.2: Very poor recall and precision\n- 0.0: Nearly\
39
+ \ zero recall or precision\n\n**YOUR ROLE: STRICT AND DIAGNOSTIC**\n\n1. **NO CELEBRATION**\
40
+ \ - Grade objectively\n2. **STRICT GRADING** - Missing entities = lower recall\n\
41
+ 3. **DIAGNOSE GAPS** - Why are expected entities missing?\n4. **RANKING DEPTH**\
42
+ \ - Are expected entities buried deep?\n\nCompare retrieved entities to expected\
43
+ \ golden set carefully.\nIdentify ALL missing entities and hypothesize why they're\
44
+ \ missing.\n"
45
+ fully_qualified_name: rem.evaluators.retrieval_recall.REMRetrievalRecallEvaluator
46
+ title: REMRetrievalRecallEvaluator
47
+ type: object
48
+ labels:
49
+ - Evaluator
50
+ - REM
51
+ - Retrieval
52
+ - Recall
53
+ - RAG
54
+ properties:
55
+ recall_score:
56
+ type: number
57
+ description: 'Recall: Retrieved expected entities / Total expected entities.
58
+
59
+ Formula: |Found ∩ Expected| / |Expected|
60
+
61
+ '
62
+ minimum: 0
63
+ maximum: 1
64
+ ranking_depth_score:
65
+ type: number
66
+ description: 'Score 0-1 for ranking depth of expected entities.
67
+
68
+ Are expected entities ranked high (top-K) or buried deep?
69
+
70
+ '
71
+ minimum: 0
72
+ maximum: 1
73
+ coverage_quality_score:
74
+ type: number
75
+ description: 'Balance between recall and precision.
76
+
77
+ Combines recall score with precision context.
78
+
79
+ '
80
+ minimum: 0
81
+ maximum: 1
82
+ retrieval_completeness_score:
83
+ type: number
84
+ description: 'Overall completeness: Average of recall + ranking_depth + coverage_quality.
85
+
86
+ '
87
+ minimum: 0
88
+ maximum: 1
89
+ pass:
90
+ type: boolean
91
+ description: 'True if recall_score >= 0.70 AND retrieval_completeness_score >=
92
+ 0.70.
93
+
94
+ '
95
+ expected_entities_found:
96
+ type: array
97
+ description: 'List of expected entities that WERE retrieved.
98
+
99
+ Include position in results.
100
+
101
+ '
102
+ items:
103
+ type: object
104
+ properties:
105
+ entity_label:
106
+ type: string
107
+ position:
108
+ type: integer
109
+ notes:
110
+ type: string
111
+ missing_expected_entities:
112
+ type: array
113
+ description: 'List of expected entities that were NOT retrieved.
114
+
115
+ Include hypothesis for why missing.
116
+
117
+ '
118
+ items:
119
+ type: object
120
+ properties:
121
+ entity_label:
122
+ type: string
123
+ entity_type:
124
+ type: string
125
+ missing_reason_hypothesis:
126
+ type: string
127
+ description: "Why might this entity be missing?\nOptions: \"embedding_quality\"\
128
+ , \"query_parsing\", \"not_in_db\",\n \"ranking_too_low\", \"\
129
+ type_filtering\", \"other\"\n"
130
+ recall_analysis:
131
+ type: string
132
+ description: "Detailed analysis of recall performance.\nExample: \"Found 3 of\
133
+ \ 4 expected entities (75% recall). Missing 'eve-jones'\n likely due\
134
+ \ to poor embedding quality - her profile mentions 'data scientist'\n \
135
+ \ not 'AI engineer' explicitly.\"\n"
136
+ ranking_depth_analysis:
137
+ type: string
138
+ description: "Analysis of where expected entities appear in results.\nExample:\
139
+ \ \"Expected entities ranked at positions 1, 3, 8. Position 8 is too deep\n\
140
+ \ for typical user queries (most users check top 5).\"\n"
141
+ false_positives:
142
+ type: array
143
+ description: 'Entities retrieved but NOT in expected set.
144
+
145
+ Note: Not necessarily wrong (golden set may be incomplete).
146
+
147
+ '
148
+ items:
149
+ type: string
150
+ strengths:
151
+ type: array
152
+ description: 'What the retrieval did well (objective).
153
+
154
+ '
155
+ items:
156
+ type: string
157
+ critical_gaps:
158
+ type: array
159
+ description: 'Major issues (missing key entities, poor coverage, etc.).
160
+
161
+ '
162
+ items:
163
+ type: string
164
+ improvement_suggestions:
165
+ type: array
166
+ description: 'Actionable suggestions to improve recall.
167
+
168
+ Example: "Improve embeddings for ''data scientist'' → ''AI engineer'' semantic
169
+ similarity"
170
+
171
+ '
172
+ items:
173
+ type: string
174
+ confidence_in_grading:
175
+ type: string
176
+ description: 'Your confidence: "high", "medium", "low"
177
+
178
+ Note: Low confidence if golden set may be incomplete
179
+
180
+ '
181
+ enum:
182
+ - high
183
+ - medium
184
+ - low
185
+ grading_notes:
186
+ type: string
187
+ description: 'Internal notes about judgment calls.
188
+
189
+ Note if golden set seems incomplete (retrieved valid entities not in expected).
190
+
191
+ '
192
+ required:
193
+ - recall_score
194
+ - ranking_depth_score
195
+ - coverage_quality_score
196
+ - retrieval_completeness_score
197
+ - pass
198
+ - expected_entities_found
199
+ - missing_expected_entities
200
+ - recall_analysis
201
+ - ranking_depth_analysis
202
+ - false_positives
203
+ - strengths
204
+ - critical_gaps
205
+ - improvement_suggestions
206
+ - confidence_in_grading
207
+ - grading_notes
208
+ version: 1.0.0
209
+ json_schema_extra:
210
+ kind: evaluator
211
+ name: rem-retrieval-recall
@@ -0,0 +1,192 @@
1
+ description: "You are THE JUDGE evaluating a REM agent's response to a SEARCH query.\n\
2
+ \n**REM SEARCH Query Pattern:**\n\nSEARCH queries perform semantic vector search\
3
+ \ across entity types:\n- Format: \"SEARCH entity_types query_text\"\n- Examples:\n\
4
+ \ - \"SEARCH person,project AI engineer with database experience\"\n - \"SEARCH\
5
+ \ technology graph database with vector support\"\n - \"SEARCH document migration\
6
+ \ planning guide\"\n\n**Expected Behavior:**\n\n1. **Semantic Ranking**: Results\
7
+ \ ranked by relevance to query\n2. **Type Filtering**: Only return requested entity\
8
+ \ types\n3. **Top-K Results**: Typically return 5-10 most relevant entities\n4.\
9
+ \ **Relevance Scores**: Include similarity scores when available\n5. **Entity Labels**:\
10
+ \ Use natural language labels (not UUIDs)\n\n**Common Errors to Catch:**\n\n1. **Wrong\
11
+ \ Entity Types**:\n - Returns person when asked for project\n - Mixes types\
12
+ \ when specific type requested\n\n2. **Poor Relevance**:\n - Returns unrelated\
13
+ \ entities\n - Missing obviously relevant entities from reference\n - Poor ranking\
14
+ \ (irrelevant results ranked high)\n\n3. **Incomplete Results**:\n - Returns fewer\
15
+ \ results than expected\n - Missing key entities from reference golden set\n\n\
16
+ 4. **Hallucinations**:\n - Invented entities not in reference\n - Made-up properties\
17
+ \ or metadata\n\n**YOUR ROLE: STRICT AND CRITICAL JUDGE**\n\n1. **NO CELEBRATION**\
18
+ \ - Grade objectively\n2. **STRICT GRADING** - Missing relevant results = points\
19
+ \ deducted\n3. **CATCH HALLUCINATIONS** - Made-up entities = FAIL\n4. **VERIFY RELEVANCE**\
20
+ \ - Are results actually related to query?\n5. **CHECK RANKING** - Are most relevant\
21
+ \ results ranked first?\n\n**Scoring Rubric:**\n\n**Relevance (0.0-1.0):**\n- 1.0:\
22
+ \ All results highly relevant to query\n- 0.8: Most results relevant, 1-2 borderline\n\
23
+ - 0.6: Several irrelevant results\n- 0.4: Many irrelevant results\n- 0.2: Mostly\
24
+ \ irrelevant\n- 0.0: Completely irrelevant or wrong types\n\n**Completeness (0.0-1.0):**\n\
25
+ - 1.0: All expected entities from reference present\n- 0.8: Missing 1 expected entity\n\
26
+ - 0.6: Missing 2-3 expected entities\n- 0.4: Missing several expected entities\n\
27
+ - 0.2: Missing most expected entities\n- 0.0: Missing all expected entities\n\n\
28
+ **Ranking Quality (0.0-1.0):**\n- 1.0: Most relevant results ranked first\n- 0.8:\
29
+ \ Good ranking with minor issues\n- 0.6: Mediocre ranking (some relevant buried)\n\
30
+ - 0.4: Poor ranking\n- 0.2: Very poor ranking\n- 0.0: No discernible ranking logic\n\
31
+ \n**Overall Score:** Average of 3 dimensions\n**Pass Threshold:** >= 0.70 (slightly\
32
+ \ lower than LOOKUP - semantic matching is harder)\n\nCompare agent results to reference\
33
+ \ golden set. Check relevance, completeness, ranking.\n"
34
+ fully_qualified_name: rem.evaluators.search_correctness.REMSearchCorrectnessEvaluator
35
+ title: REMSearchCorrectnessEvaluator
36
+ type: object
37
+ labels:
38
+ - Evaluator
39
+ - REM
40
+ - SEARCH
41
+ - Correctness
42
+ - Semantic
43
+ properties:
44
+ relevance_score:
45
+ type: number
46
+ description: 'Score 0-1 for relevance of returned entities to query.
47
+
48
+ Are results semantically related to query text?
49
+
50
+ Are entity types correct?
51
+
52
+ '
53
+ minimum: 0
54
+ maximum: 1
55
+ completeness_score:
56
+ type: number
57
+ description: 'Score 0-1 for completeness compared to reference.
58
+
59
+ Are all expected entities from reference present?
60
+
61
+ Are key relevant entities included?
62
+
63
+ '
64
+ minimum: 0
65
+ maximum: 1
66
+ ranking_quality_score:
67
+ type: number
68
+ description: 'Score 0-1 for ranking quality.
69
+
70
+ Are most relevant results ranked first?
71
+
72
+ Is there clear relevance ordering?
73
+
74
+ '
75
+ minimum: 0
76
+ maximum: 1
77
+ overall_score:
78
+ type: number
79
+ description: 'Average of relevance + completeness + ranking_quality (sum/3).
80
+
81
+ '
82
+ minimum: 0
83
+ maximum: 1
84
+ pass:
85
+ type: boolean
86
+ description: 'True if overall_score >= 0.70 AND relevance_score >= 0.5
87
+
88
+ AND no hallucinated entities detected.
89
+
90
+ '
91
+ relevance_details:
92
+ type: string
93
+ description: 'Assessment of result relevance to query.
94
+
95
+ Example: "First 3 results highly relevant, last 2 borderline"
96
+
97
+ '
98
+ completeness_details:
99
+ type: string
100
+ description: 'Comparison to reference golden set.
101
+
102
+ Example: "Missing ''sarah-chen'' person entity expected in top results"
103
+
104
+ '
105
+ ranking_details:
106
+ type: string
107
+ description: 'Assessment of ranking quality.
108
+
109
+ Example: "Most relevant entity ranked #3 (should be #1)"
110
+
111
+ '
112
+ hallucinations_detected:
113
+ type: array
114
+ description: 'List of entities in results but not in reference.
115
+
116
+ May not be errors (new data) but flag for review.
117
+
118
+ '
119
+ items:
120
+ type: string
121
+ missing_expected_entities:
122
+ type: array
123
+ description: 'List of entities in reference but missing from results.
124
+
125
+ '
126
+ items:
127
+ type: string
128
+ irrelevant_results:
129
+ type: array
130
+ description: 'List of results that don''t match query intent.
131
+
132
+ '
133
+ items:
134
+ type: string
135
+ strengths:
136
+ type: array
137
+ description: 'What the search did well (objective).
138
+
139
+ '
140
+ items:
141
+ type: string
142
+ critical_gaps:
143
+ type: array
144
+ description: 'Major issues (missing key results, wrong types, etc.).
145
+
146
+ '
147
+ items:
148
+ type: string
149
+ improvement_suggestions:
150
+ type: array
151
+ description: 'Actionable suggestions to improve search quality.
152
+
153
+ '
154
+ items:
155
+ type: string
156
+ confidence_in_grading:
157
+ type: string
158
+ description: 'Your confidence: "high", "medium", "low"
159
+
160
+ (Semantic matching is subjective - lower confidence OK)
161
+
162
+ '
163
+ enum:
164
+ - high
165
+ - medium
166
+ - low
167
+ grading_notes:
168
+ type: string
169
+ description: 'Internal notes about judgment calls or edge cases.
170
+
171
+ '
172
+ required:
173
+ - relevance_score
174
+ - completeness_score
175
+ - ranking_quality_score
176
+ - overall_score
177
+ - pass
178
+ - relevance_details
179
+ - completeness_details
180
+ - ranking_details
181
+ - hallucinations_detected
182
+ - missing_expected_entities
183
+ - irrelevant_results
184
+ - strengths
185
+ - critical_gaps
186
+ - improvement_suggestions
187
+ - confidence_in_grading
188
+ - grading_notes
189
+ version: 1.0.0
190
+ json_schema_extra:
191
+ kind: evaluator
192
+ name: rem-search-correctness
@@ -0,0 +1,16 @@
1
+ """
2
+ REM Services
3
+
4
+ Service layer for REM system operations:
5
+ - PostgresService: PostgreSQL/CloudNativePG database operations
6
+ - RemService: REM query execution and graph operations
7
+
8
+ For file/S3 operations, use rem.services.fs instead:
9
+ from rem.services.fs import FS, S3Provider
10
+ """
11
+
12
+ from .fs.service import FileSystemService
13
+ from .postgres import PostgresService
14
+ from .rem import RemService
15
+
16
+ __all__ = ["PostgresService", "RemService", "FileSystemService"]