remdb 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.0.dist-info/METADATA +1455 -0
  185. remdb-0.3.0.dist-info/RECORD +187 -0
  186. remdb-0.3.0.dist-info/WHEEL +4 -0
  187. remdb-0.3.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,17 @@
1
+ """
2
+ Git service for versioned schema and experiment syncing.
3
+
4
+ Provides high-level operations for working with versioned agent schemas,
5
+ evaluators, and experiments stored in Git repositories.
6
+
7
+ Usage:
8
+ from rem.services.git import GitService
9
+
10
+ git_svc = GitService()
11
+ versions = git_svc.list_schema_versions("cv-parser")
12
+ schema = git_svc.load_schema("cv-parser", version="v2.1.0")
13
+ """
14
+
15
+ from rem.services.git.service import GitService
16
+
17
+ __all__ = ["GitService"]
@@ -0,0 +1,469 @@
1
+ """
2
+ Git Service for semantic versioning and schema evolution tracking.
3
+
4
+ Provides high-level operations for working with versioned agent schemas,
5
+ evaluators, and experiments stored in Git repositories. Wraps GitProvider
6
+ with business logic and semantic versioning awareness.
7
+
8
+ **Key Concepts**:
9
+ 1. **Schema Versioning**: Track agent schema evolution using semantic versions
10
+ 2. **Reproducible Evaluations**: Pin experiments to specific schema versions
11
+ 3. **Migration Planning**: Compare versions to identify breaking changes
12
+ 4. **Audit Trail**: Track who changed what and when
13
+
14
+ **Architecture**:
15
+ ```
16
+ GitService (this file)
17
+
18
+ FS.git_provider (thin wrapper)
19
+
20
+ GitProvider (git operations)
21
+
22
+ GitPython (git CLI wrapper)
23
+ ```
24
+
25
+ **Use Cases**:
26
+
27
+ 1. **Schema Registry Pattern**:
28
+ ```python
29
+ git_svc = GitService()
30
+
31
+ # List available schema versions
32
+ versions = git_svc.list_schema_versions("cv-parser")
33
+
34
+ # Load specific version
35
+ schema = git_svc.load_schema("cv-parser", version="v2.1.0")
36
+
37
+ # Load latest version
38
+ schema = git_svc.load_schema("cv-parser")
39
+ ```
40
+
41
+ 2. **Version Comparison**:
42
+ ```python
43
+ # Compare two versions
44
+ diff = git_svc.compare_schemas("cv-parser", "v2.0.0", "v2.1.0")
45
+
46
+ # Check for breaking changes
47
+ if git_svc.has_breaking_changes("cv-parser", "v2.0.0", "v2.1.0"):
48
+ print("⚠️ Breaking changes detected!")
49
+ ```
50
+
51
+ 3. **Experiment Pinning**:
52
+ ```python
53
+ # Run experiment with pinned schema version
54
+ schema = git_svc.load_schema("cv-parser", version="v2.1.0")
55
+ experiment = git_svc.load_experiment("hello-world", version="v1.0.0")
56
+
57
+ # Log version metadata
58
+ metadata = {
59
+ "schema_version": "v2.1.0",
60
+ "experiment_version": "v1.0.0",
61
+ "schema_commit": git_svc.get_commit("schemas/cv-parser.yaml", "v2.1.0")
62
+ }
63
+ ```
64
+
65
+ 4. **Multi-Tenant Schema Management**:
66
+ ```python
67
+ # Each tenant can use different schema versions
68
+ tenant_a_schema = git_svc.load_schema("cv-parser", version="v2.0.0")
69
+ tenant_b_schema = git_svc.load_schema("cv-parser", version="v2.1.0")
70
+ ```
71
+
72
+ **Integration with Agent Factory**:
73
+ ```python
74
+ from rem.services.git_service import GitService
75
+ from rem.agentic.factory import create_agent
76
+
77
+ git_svc = GitService()
78
+
79
+ # Load schema from git
80
+ schema_content = git_svc.load_schema("cv-parser", version="v2.1.0")
81
+
82
+ # Create agent
83
+ agent = create_agent(schema_content)
84
+
85
+ # Run agent
86
+ result = await agent.run("Extract from resume...")
87
+ ```
88
+
89
+ **CLI Integration**:
90
+ ```bash
91
+ # List schema versions
92
+ rem git schema list cv-parser
93
+
94
+ # Compare versions
95
+ rem git schema diff cv-parser v2.0.0 v2.1.0
96
+
97
+ # Load schema at version
98
+ rem git schema show cv-parser --version v2.1.0
99
+
100
+ # Sync repo (pull latest changes)
101
+ rem git sync
102
+ ```
103
+ """
104
+
105
+ from typing import Any, TYPE_CHECKING
106
+ from pathlib import Path
107
+
108
+ from loguru import logger
109
+
110
+ from rem.settings import settings
111
+
112
+ if TYPE_CHECKING:
113
+ from rem.services.fs import FS
114
+
115
+
116
+ class GitService:
117
+ """
118
+ High-level Git operations for versioned schemas and experiments.
119
+
120
+ Provides semantic versioning awareness, schema comparison, and
121
+ migration planning utilities. Wraps GitProvider with business logic.
122
+
123
+ **Path Conventions**:
124
+ - Agent schemas: schemas/agents/{agent_name}.yaml
125
+ - Evaluators: schemas/evaluators/{agent_name}/{evaluator_name}.yaml
126
+ - Experiments: experiments/{experiment_name}/
127
+
128
+ **Version Format**: Semantic versioning (MAJOR.MINOR.PATCH)
129
+ - Tags use format: schemas/{agent_name}/vX.Y.Z (e.g., schemas/test/v2.1.0)
130
+ - Can use patterns: v2.* (all v2 versions)
131
+
132
+ Attributes:
133
+ fs: Filesystem interface with Git provider
134
+ schemas_dir: Directory for agent schemas (default: schemas/agents)
135
+ experiments_dir: Directory for experiments (default: experiments/)
136
+
137
+ Examples:
138
+ >>> git_svc = GitService()
139
+ >>> versions = git_svc.list_schema_versions("cv-parser")
140
+ >>> schema = git_svc.load_schema("cv-parser", version="schemas/cv-parser/v2.1.0")
141
+ >>> diff = git_svc.compare_schemas("cv-parser", "schemas/cv-parser/v2.0.0", "schemas/cv-parser/v2.1.0")
142
+ """
143
+
144
+ def __init__(
145
+ self,
146
+ fs: "FS | None" = None,
147
+ schemas_dir: str = "rem/schemas/agents",
148
+ experiments_dir: str = "rem/experiments",
149
+ ):
150
+ """
151
+ Initialize Git service.
152
+
153
+ Args:
154
+ fs: Filesystem interface (creates new FS() if None)
155
+ schemas_dir: Directory for agent schemas (default: rem/schemas/agents)
156
+ experiments_dir: Directory for experiments (default: rem/experiments)
157
+
158
+ Raises:
159
+ ValueError: If Git provider is not enabled
160
+ """
161
+ # Import here to avoid circular dependency
162
+ from rem.services.fs import FS
163
+ self.fs = fs or FS()
164
+
165
+ if not settings.git.enabled or not self.fs._git_provider:
166
+ raise ValueError(
167
+ "Git provider not enabled. Set GIT__ENABLED=true and GIT__DEFAULT_REPO_URL"
168
+ )
169
+
170
+ # Type guard: git provider is guaranteed to exist after the check above
171
+ assert self.fs._git_provider is not None
172
+
173
+ self.schemas_dir = schemas_dir
174
+ self.experiments_dir = experiments_dir
175
+
176
+ logger.info("Initialized GitService")
177
+
178
+ def list_schema_versions(
179
+ self,
180
+ schema_name: str,
181
+ pattern: str | None = None
182
+ ) -> list[dict[str, Any]]:
183
+ """
184
+ List all semantic versions of a schema.
185
+
186
+ Returns versions sorted by semver (newest first) with commit metadata.
187
+
188
+ Args:
189
+ schema_name: Schema name (e.g., "cv-parser", "contract-analyzer")
190
+ pattern: Optional version pattern (e.g., "v2\\..*" for v2.x.x only)
191
+
192
+ Returns:
193
+ List of version dicts:
194
+ [
195
+ {
196
+ "tag": "v2.1.1",
197
+ "version": (2, 1, 1),
198
+ "commit": "abc123...",
199
+ "date": "2025-01-15T10:30:00",
200
+ "message": "feat: Add confidence scoring",
201
+ "author": "alice@example.com"
202
+ },
203
+ ...
204
+ ]
205
+
206
+ Examples:
207
+ >>> versions = git_svc.list_schema_versions("cv-parser")
208
+ >>> print(f"Latest: {versions[0]['tag']}")
209
+ Latest: v2.1.1
210
+
211
+ >>> v2_versions = git_svc.list_schema_versions("cv-parser", pattern="v2\\..*")
212
+ >>> print(f"Latest v2: {v2_versions[0]['tag']}")
213
+ Latest v2: v2.1.1
214
+ """
215
+ schema_path = f"{self.schemas_dir}/{schema_name}.yaml"
216
+
217
+ # Type guard: git provider exists (validated in __init__)
218
+ assert self.fs._git_provider is not None
219
+
220
+ versions = self.fs._git_provider.get_semantic_versions(
221
+ schema_path,
222
+ pattern=pattern
223
+ )
224
+
225
+ logger.info(
226
+ f"Found {len(versions)} versions for schema '{schema_name}' "
227
+ f"(pattern: {pattern or 'all'})"
228
+ )
229
+
230
+ return versions
231
+
232
+ def load_schema(
233
+ self,
234
+ schema_name: str,
235
+ version: str | None = None
236
+ ) -> dict[str, Any]:
237
+ """
238
+ Load agent schema at specific version.
239
+
240
+ Args:
241
+ schema_name: Schema name (e.g., "cv-parser")
242
+ version: Semantic version tag (e.g., "v2.1.0"), or None for latest
243
+
244
+ Returns:
245
+ Parsed schema content (dict from YAML)
246
+
247
+ Raises:
248
+ FileNotFoundError: If schema doesn't exist
249
+ ValueError: If version is invalid
250
+
251
+ Examples:
252
+ >>> # Load latest version
253
+ >>> schema = git_svc.load_schema("cv-parser")
254
+
255
+ >>> # Load specific version
256
+ >>> schema = git_svc.load_schema("cv-parser", version="v2.1.0")
257
+
258
+ >>> # Use in agent factory
259
+ >>> from rem.agentic.factory import create_agent
260
+ >>> agent = create_agent(schema)
261
+ """
262
+ schema_path = f"{self.schemas_dir}/{schema_name}.yaml"
263
+
264
+ if version:
265
+ uri = f"git://{schema_path}?ref={version}"
266
+ else:
267
+ uri = f"git://{schema_path}"
268
+
269
+ logger.info(f"Loading schema '{schema_name}' (version: {version or 'latest'})")
270
+
271
+ return self.fs.read(uri)
272
+
273
+ def compare_schemas(
274
+ self,
275
+ schema_name: str,
276
+ version1: str,
277
+ version2: str,
278
+ unified: int = 3
279
+ ) -> str:
280
+ """
281
+ Generate diff between two schema versions.
282
+
283
+ Useful for:
284
+ - Code review: What changed?
285
+ - Migration planning: Breaking changes?
286
+ - Audit trail: Who changed what?
287
+
288
+ Args:
289
+ schema_name: Schema name
290
+ version1: First version (e.g., "v2.0.0")
291
+ version2: Second version (e.g., "v2.1.0")
292
+ unified: Number of context lines
293
+
294
+ Returns:
295
+ Unified diff string (Git format)
296
+
297
+ Examples:
298
+ >>> diff = git_svc.compare_schemas("cv-parser", "v2.0.0", "v2.1.0")
299
+ >>> print(diff)
300
+ --- a/schemas/cv-parser.yaml
301
+ +++ b/schemas/cv-parser.yaml
302
+ @@ -10,6 +10,7 @@
303
+ skills:
304
+ type: array
305
+ + description: Candidate technical skills
306
+
307
+ >>> # Check for breaking changes
308
+ >>> if "required:" in diff and "-" in diff:
309
+ ... print("⚠️ Breaking change: Required field removed")
310
+ """
311
+ schema_path = f"{self.schemas_dir}/{schema_name}.yaml"
312
+
313
+ # Type guard: git provider exists (validated in __init__)
314
+ assert self.fs._git_provider is not None
315
+
316
+ diff = self.fs._git_provider.diff_versions(
317
+ schema_path,
318
+ version1,
319
+ version2,
320
+ unified=unified
321
+ )
322
+
323
+ logger.info(f"Generated diff for '{schema_name}' ({version1} → {version2})")
324
+
325
+ return diff
326
+
327
+ def has_breaking_changes(
328
+ self,
329
+ schema_name: str,
330
+ version1: str,
331
+ version2: str
332
+ ) -> bool:
333
+ """
334
+ Check if upgrade contains breaking changes.
335
+
336
+ Heuristics for breaking changes:
337
+ - Required field removed
338
+ - Field type changed
339
+ - Enum values removed
340
+ - Major version bump
341
+
342
+ Args:
343
+ schema_name: Schema name
344
+ version1: Old version
345
+ version2: New version
346
+
347
+ Returns:
348
+ True if breaking changes detected
349
+
350
+ Examples:
351
+ >>> has_breaking = git_svc.has_breaking_changes(
352
+ ... "cv-parser", "v1.2.0", "v2.0.0"
353
+ ... )
354
+ >>> if has_breaking:
355
+ ... print("⚠️ Manual migration required")
356
+ """
357
+ import re
358
+
359
+ # Extract version numbers from tags (support both v2.1.0 and schemas/test/v2.1.0)
360
+ semver_pattern = re.compile(r"v?(\d+)\.(\d+)\.(\d+)")
361
+
362
+ v1_match = semver_pattern.search(version1)
363
+ v2_match = semver_pattern.search(version2)
364
+
365
+ if not v1_match or not v2_match:
366
+ logger.warning(f"Could not parse versions: {version1}, {version2}")
367
+ return False
368
+
369
+ v1_major = int(v1_match.group(1))
370
+ v2_major = int(v2_match.group(1))
371
+
372
+ # Check major version bump
373
+ if v2_major > v1_major:
374
+ logger.warning(
375
+ f"Major version bump detected: {version1} → {version2}"
376
+ )
377
+ return True
378
+
379
+ # Check diff for breaking change patterns
380
+ diff = self.compare_schemas(schema_name, version1, version2)
381
+
382
+ breaking_patterns = [
383
+ "- required:", # Required field removed
384
+ "- type:", # Type changed
385
+ "- enum:", # Enum values removed
386
+ ]
387
+
388
+ for pattern in breaking_patterns:
389
+ if pattern in diff:
390
+ logger.warning(
391
+ f"Breaking change pattern '{pattern}' found in diff"
392
+ )
393
+ return True
394
+
395
+ return False
396
+
397
+ def load_experiment(
398
+ self,
399
+ experiment_name: str,
400
+ version: str | None = None
401
+ ) -> dict[str, Any]:
402
+ """
403
+ Load experiment configuration at specific version.
404
+
405
+ Args:
406
+ experiment_name: Experiment name (e.g., "hello-world")
407
+ version: Version tag, or None for latest
408
+
409
+ Returns:
410
+ Experiment metadata and configuration
411
+
412
+ Examples:
413
+ >>> exp = git_svc.load_experiment("hello-world", version="v1.0.0")
414
+ >>> ground_truth = exp["datasets"]["ground_truth"]
415
+ """
416
+ exp_path = f"{self.experiments_dir}/{experiment_name}/config.yaml"
417
+
418
+ if version:
419
+ uri = f"git://{exp_path}?ref={version}"
420
+ else:
421
+ uri = f"git://{exp_path}"
422
+
423
+ logger.info(f"Loading experiment '{experiment_name}' (version: {version or 'latest'})")
424
+
425
+ return self.fs.read(uri)
426
+
427
+ def sync(self):
428
+ """
429
+ Sync repository (pull latest changes).
430
+
431
+ Clears cache and forces fresh clone on next access.
432
+ Useful for periodic updates or manual refresh.
433
+
434
+ Examples:
435
+ >>> # Cron job: sync every 5 minutes
436
+ >>> git_svc.sync()
437
+
438
+ >>> # Manual refresh after schema update
439
+ >>> git_svc.sync()
440
+ >>> schema = git_svc.load_schema("cv-parser") # Gets latest
441
+ """
442
+ # Type guard: git provider exists (validated in __init__)
443
+ assert self.fs._git_provider is not None
444
+
445
+ self.fs._git_provider.clear_cache()
446
+ logger.info("Cleared Git cache - next access will fetch latest changes")
447
+
448
+ def get_commit(self, path: str, version: str) -> str:
449
+ """
450
+ Get commit hash for file at specific version.
451
+
452
+ Useful for tracking exact version loaded for reproducibility.
453
+
454
+ Args:
455
+ path: File path in repository
456
+ version: Version tag
457
+
458
+ Returns:
459
+ Full commit hash (40 characters)
460
+
461
+ Examples:
462
+ >>> commit = git_svc.get_commit("schemas/cv-parser.yaml", "v2.1.0")
463
+ >>> print(f"Loaded from commit: {commit[:8]}")
464
+ Loaded from commit: abc12345
465
+ """
466
+ # Type guard: git provider exists (validated in __init__)
467
+ assert self.fs._git_provider is not None
468
+
469
+ return self.fs._git_provider.get_current_commit(version)