remdb 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.0.dist-info/METADATA +1455 -0
  185. remdb-0.3.0.dist-info/RECORD +187 -0
  186. remdb-0.3.0.dist-info/WHEEL +4 -0
  187. remdb-0.3.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1075 @@
1
+ # Git Provider for Versioned Schema & Experiment Syncing
2
+
3
+ REM's Git provider enables syncing of agent schemas, evaluators, and experiments from Git repositories with full semantic versioning support. Designed for Kubernetes cluster environments with proper secret management.
4
+
5
+ ## Table of Contents
6
+
7
+ - [Quick Start](#quick-start)
8
+ - [Architecture](#architecture)
9
+ - [Authentication](#authentication)
10
+ - [URI Format](#uri-format)
11
+ - [Semantic Versioning](#semantic-versioning)
12
+ - [Kubernetes Deployment](#kubernetes-deployment)
13
+ - [Use Cases](#use-cases)
14
+ - [API Reference](#api-reference)
15
+ - [Security Best Practices](#security-best-practices)
16
+ - [Performance & Caching](#performance--caching)
17
+ - [Troubleshooting](#troubleshooting)
18
+
19
+ ---
20
+
21
+ ## Quick Start
22
+
23
+ ### Installation
24
+
25
+ ```bash
26
+ # Add GitPython dependency
27
+ cd rem
28
+ uv add GitPython
29
+
30
+ # Or with pip
31
+ pip install GitPython
32
+ ```
33
+
34
+ ### Configuration
35
+
36
+ ```bash
37
+ # Enable Git provider
38
+ export GIT__ENABLED=true
39
+ export GIT__DEFAULT_REPO_URL="ssh://git@github.com/my-org/my-repo.git"
40
+
41
+ # Optional: Configure cache and SSH paths
42
+ export GIT__CACHE_DIR="/tmp/rem-git-cache"
43
+ export GIT__SSH_KEY_PATH="/etc/git-secret/ssh"
44
+ export GIT__KNOWN_HOSTS_PATH="/etc/git-secret/known_hosts"
45
+ ```
46
+
47
+ ### Basic Usage
48
+
49
+ ```python
50
+ from rem.services.fs import FS
51
+ from rem.services.git_service import GitService
52
+
53
+ # Filesystem interface (low-level)
54
+ fs = FS()
55
+ schema = fs.read("git://schemas/cv-parser.yaml?ref=v2.1.0")
56
+ schemas = fs.ls("git://schemas/")
57
+
58
+ # GitService interface (high-level, recommended)
59
+ git_svc = GitService()
60
+
61
+ # List schema versions
62
+ versions = git_svc.list_schema_versions("cv-parser")
63
+ print(f"Latest: {versions[0]['tag']}") # v2.1.1
64
+
65
+ # Load specific version
66
+ schema = git_svc.load_schema("cv-parser", version="v2.1.0")
67
+
68
+ # Compare versions
69
+ diff = git_svc.compare_schemas("cv-parser", "v2.0.0", "v2.1.0")
70
+ print(diff)
71
+
72
+ # Check for breaking changes
73
+ if git_svc.has_breaking_changes("cv-parser", "v2.0.0", "v2.1.0"):
74
+ print("⚠️ Manual migration required")
75
+ ```
76
+
77
+ ---
78
+
79
+ ## Architecture
80
+
81
+ ### Component Overview
82
+
83
+ ```
84
+ GitService (High-level semantic operations)
85
+
86
+ FS.git_provider (Thin wrapper for FS interface)
87
+
88
+ GitProvider (Git operations with caching)
89
+
90
+ GitPython (Git CLI wrapper)
91
+
92
+ Git CLI (System git command)
93
+ ```
94
+
95
+ ### Path Conventions
96
+
97
+ ```
98
+ Repository Structure:
99
+ repo/
100
+ ├── schemas/ # Agent schemas
101
+ │ ├── cv-parser.yaml # git://schemas/cv-parser.yaml
102
+ │ ├── contract-analyzer.yaml
103
+ │ └── evaluators/ # Evaluator schemas
104
+ │ ├── cv-correctness.yaml
105
+ │ └── contract-risk.yaml
106
+ └── experiments/ # Evaluation experiments
107
+ ├── hello-world/
108
+ │ ├── config.yaml
109
+ │ └── ground_truth.csv
110
+ └── cv-parser-test/
111
+ ├── config.yaml
112
+ └── resumes/
113
+ ```
114
+
115
+ ### Caching Strategy
116
+
117
+ ```
118
+ Local Cache Structure:
119
+ /tmp/rem-git-cache/
120
+ └── {repo_hash}/ # SHA256 hash of repo URL
121
+ ├── main/ # Default branch
122
+ │ ├── schemas/
123
+ │ └── experiments/
124
+ ├── v2.1.0/ # Tag
125
+ │ └── schemas/
126
+ └── v2.1.1/ # Tag
127
+ └── schemas/
128
+
129
+ Cache Invalidation:
130
+ - Manual: git_svc.sync() or provider.clear_cache()
131
+ - Automatic: Configurable sync interval (default: 5 minutes)
132
+ - Per-ref: Cache cleared per tag/branch
133
+ ```
134
+
135
+ ---
136
+
137
+ ## Authentication
138
+
139
+ ### Method 1: SSH Keys (Recommended for Production)
140
+
141
+ **Setup**:
142
+ ```bash
143
+ # Generate SSH key (if needed)
144
+ ssh-keygen -t ed25519 -C "rem-cluster@example.com" -f ~/.ssh/rem_deploy_key
145
+
146
+ # Add public key as deploy key in GitHub/GitLab
147
+ # Settings → Deploy keys → Add deploy key
148
+ # ✓ Read-only access
149
+ # ✗ Write access (not needed)
150
+
151
+ # Configure REM
152
+ export GIT__SSH_KEY_PATH="$HOME/.ssh/rem_deploy_key"
153
+ export GIT__KNOWN_HOSTS_PATH="$HOME/.ssh/known_hosts"
154
+ ```
155
+
156
+ **Advantages**:
157
+ - ✅ No rate limits
158
+ - ✅ Full Git protocol support
159
+ - ✅ Works with private repos
160
+ - ✅ More secure (no token in environment)
161
+
162
+ **Known Hosts Setup**:
163
+ ```bash
164
+ # Add GitHub to known_hosts
165
+ ssh-keyscan github.com >> ~/.ssh/known_hosts
166
+
167
+ # Add GitLab to known_hosts
168
+ ssh-keyscan gitlab.com >> ~/.ssh/known_hosts
169
+
170
+ # Add self-hosted Git server
171
+ ssh-keyscan git.example.com >> ~/.ssh/known_hosts
172
+ ```
173
+
174
+ ### Method 2: HTTPS with Personal Access Token
175
+
176
+ **Setup**:
177
+ ```bash
178
+ # Create PAT in GitHub/GitLab
179
+ # GitHub: Settings → Developer settings → Personal access tokens → Fine-grained tokens
180
+ # Permissions: Contents (read-only)
181
+
182
+ export GIT__PERSONAL_ACCESS_TOKEN="ghp_xxxxxxxxxxxxxxxxxxxx"
183
+ export GIT__DEFAULT_REPO_URL="https://github.com/my-org/my-repo.git"
184
+ ```
185
+
186
+ **Rate Limits**:
187
+ - GitHub: 5,000 API requests/hour per authenticated user
188
+ - GitLab: 2,000 API requests/hour per user
189
+ - Bitbucket: 1,000 API requests/hour per user
190
+
191
+ **Advantages**:
192
+ - ✅ Easier local development setup
193
+ - ✅ Works with corporate proxies
194
+ - ✅ Fine-grained permissions (GitHub)
195
+
196
+ **Disadvantages**:
197
+ - ❌ Rate limits apply
198
+ - ❌ Token in environment variable
199
+ - ❌ Token rotation required
200
+
201
+ ---
202
+
203
+ ## URI Format
204
+
205
+ ### Syntax
206
+
207
+ ```
208
+ git://{path}[?ref={version}]
209
+
210
+ Where:
211
+ - path: Path within repository (e.g., "schemas/cv-parser.yaml")
212
+ - ref: Optional Git reference (branch, tag, or commit hash)
213
+ ```
214
+
215
+ ### Examples
216
+
217
+ ```python
218
+ # Read from default branch (main)
219
+ fs.read("git://schemas/cv-parser.yaml")
220
+
221
+ # Read from specific tag
222
+ fs.read("git://schemas/cv-parser.yaml?ref=v2.1.0")
223
+
224
+ # Read from branch
225
+ fs.read("git://schemas/cv-parser.yaml?ref=feature-branch")
226
+
227
+ # Read from commit hash
228
+ fs.read("git://schemas/cv-parser.yaml?ref=abc123def456")
229
+
230
+ # List directory
231
+ fs.ls("git://schemas/")
232
+ fs.ls("git://experiments/hello-world/?ref=v1.0.0")
233
+
234
+ # Check existence
235
+ fs.exists("git://schemas/cv-parser.yaml?ref=v2.1.0")
236
+ ```
237
+
238
+ ---
239
+
240
+ ## Semantic Versioning
241
+
242
+ ### Version Tracking
243
+
244
+ REM follows [Semantic Versioning 2.0.0](https://semver.org/):
245
+
246
+ ```
247
+ MAJOR.MINOR.PATCH
248
+
249
+ Examples:
250
+ - v2.1.0 → v2.1.1: PATCH (bug fix, backwards compatible)
251
+ - v2.1.1 → v2.2.0: MINOR (new feature, backwards compatible)
252
+ - v2.2.0 → v3.0.0: MAJOR (breaking change, not backwards compatible)
253
+ ```
254
+
255
+ ### Get Version History
256
+
257
+ ```python
258
+ from rem.services.git_service import GitService
259
+
260
+ git_svc = GitService()
261
+
262
+ # Get all versions
263
+ versions = git_svc.list_schema_versions("cv-parser")
264
+
265
+ for v in versions:
266
+ print(f"{v['tag']}: {v['message']} by {v['author']} on {v['date']}")
267
+
268
+ # Output:
269
+ # v2.1.1: feat: Add confidence scoring by alice@example.com on 2025-01-15T10:30:00
270
+ # v2.1.0: feat: Add multi-language support by bob@example.com on 2025-01-10T14:20:00
271
+ # v2.0.0: feat!: Redesign output schema by alice@example.com on 2025-01-05T09:00:00
272
+ ```
273
+
274
+ ### Filter by Version Pattern
275
+
276
+ ```python
277
+ # Get only v2.x.x versions
278
+ v2_versions = git_svc.list_schema_versions("cv-parser", pattern="v2\\..*")
279
+
280
+ # Get only v2.1.x versions
281
+ v2_1_versions = git_svc.list_schema_versions("cv-parser", pattern="v2\\.1\\..*")
282
+ ```
283
+
284
+ ### Compare Versions
285
+
286
+ ```python
287
+ # Get diff between versions
288
+ diff = git_svc.compare_schemas("cv-parser", "v2.1.0", "v2.1.1")
289
+ print(diff)
290
+
291
+ # Output:
292
+ # --- a/schemas/cv-parser.yaml
293
+ # +++ b/schemas/cv-parser.yaml
294
+ # @@ -10,6 +10,8 @@
295
+ # skills:
296
+ # type: array
297
+ # + items:
298
+ # + type: string
299
+ ```
300
+
301
+ ### Breaking Change Detection
302
+
303
+ ```python
304
+ # Check for breaking changes
305
+ has_breaking = git_svc.has_breaking_changes("cv-parser", "v2.1.0", "v3.0.0")
306
+
307
+ if has_breaking:
308
+ print("⚠️ Breaking changes detected!")
309
+ print("Manual migration required.")
310
+
311
+ # Show diff
312
+ diff = git_svc.compare_schemas("cv-parser", "v2.1.0", "v3.0.0")
313
+ print(diff)
314
+ ```
315
+
316
+ ---
317
+
318
+ ## Kubernetes Deployment
319
+
320
+ ### Secret Creation
321
+
322
+ ```bash
323
+ # Create Kubernetes Secret with SSH key
324
+ kubectl create secret generic git-creds \
325
+ --from-file=ssh=$HOME/.ssh/rem_deploy_key \
326
+ --from-file=known_hosts=$HOME/.ssh/known_hosts \
327
+ --namespace rem-app
328
+
329
+ # Verify secret
330
+ kubectl get secret git-creds -n rem-app -o yaml
331
+ ```
332
+
333
+ ### Deployment YAML
334
+
335
+ ```yaml
336
+ apiVersion: apps/v1
337
+ kind: Deployment
338
+ metadata:
339
+ name: rem-api
340
+ namespace: rem-app
341
+ spec:
342
+ replicas: 2
343
+ selector:
344
+ matchLabels:
345
+ app: rem-api
346
+ template:
347
+ metadata:
348
+ labels:
349
+ app: rem-api
350
+ spec:
351
+ # Security context for SSH key permissions
352
+ securityContext:
353
+ fsGroup: 65533 # git user group
354
+
355
+ volumes:
356
+ # Mount Git credentials from Secret
357
+ - name: git-secret
358
+ secret:
359
+ secretName: git-creds
360
+ defaultMode: 0400 # Read-only for owner
361
+
362
+ containers:
363
+ - name: rem-api
364
+ image: percolationlabs/rem:latest
365
+
366
+ env:
367
+ # Enable Git provider
368
+ - name: GIT__ENABLED
369
+ value: "true"
370
+ - name: GIT__DEFAULT_REPO_URL
371
+ value: "ssh://git@github.com/my-org/my-repo.git"
372
+ - name: GIT__SSH_KEY_PATH
373
+ value: "/etc/git-secret/ssh"
374
+ - name: GIT__KNOWN_HOSTS_PATH
375
+ value: "/etc/git-secret/known_hosts"
376
+ - name: GIT__CACHE_DIR
377
+ value: "/app/git-cache"
378
+ - name: GIT__SHALLOW_CLONE
379
+ value: "true"
380
+
381
+ volumeMounts:
382
+ # Mount Git credentials
383
+ - name: git-secret
384
+ mountPath: /etc/git-secret
385
+ readOnly: true
386
+
387
+ resources:
388
+ requests:
389
+ memory: "256Mi"
390
+ cpu: "100m"
391
+ limits:
392
+ memory: "512Mi"
393
+ cpu: "500m"
394
+ ```
395
+
396
+ ### Git-Sync Sidecar Pattern (Alternative)
397
+
398
+ ```yaml
399
+ apiVersion: apps/v1
400
+ kind: Deployment
401
+ metadata:
402
+ name: rem-api-with-git-sync
403
+ spec:
404
+ template:
405
+ spec:
406
+ volumes:
407
+ - name: git-secret
408
+ secret:
409
+ secretName: git-creds
410
+ - name: git-repo
411
+ emptyDir: {}
412
+
413
+ containers:
414
+ # Main application container
415
+ - name: rem-api
416
+ image: percolationlabs/rem:latest
417
+ env:
418
+ - name: GIT__ENABLED
419
+ value: "false" # Use git-sync instead
420
+ volumeMounts:
421
+ - name: git-repo
422
+ mountPath: /app/git-repo
423
+ readOnly: true
424
+
425
+ # Git-sync sidecar (keeps repo in sync)
426
+ - name: git-sync
427
+ image: registry.k8s.io/git-sync/git-sync:v4.0.0
428
+ env:
429
+ - name: GITSYNC_REPO
430
+ value: "ssh://git@github.com/my-org/my-repo.git"
431
+ - name: GITSYNC_ROOT
432
+ value: "/git"
433
+ - name: GITSYNC_DEST
434
+ value: "repo"
435
+ - name: GITSYNC_PERIOD
436
+ value: "30s" # Sync every 30 seconds
437
+ - name: GITSYNC_SSH_KEY_FILE
438
+ value: "/etc/git-secret/ssh"
439
+ volumeMounts:
440
+ - name: git-repo
441
+ mountPath: /git
442
+ - name: git-secret
443
+ mountPath: /etc/git-secret
444
+ readOnly: true
445
+ ```
446
+
447
+ ---
448
+
449
+ ## Use Cases
450
+
451
+ ### 1. Schema Versioning
452
+
453
+ **Problem**: Need to track agent schema evolution and compare versions.
454
+
455
+ ```python
456
+ from rem.services.git_service import GitService
457
+
458
+ git_svc = GitService()
459
+
460
+ # Production uses v2.1.0
461
+ prod_schema = git_svc.load_schema("cv-parser", version="v2.1.0")
462
+
463
+ # Staging tests v2.1.1
464
+ staging_schema = git_svc.load_schema("cv-parser", version="v2.1.1")
465
+
466
+ # Compare to see what changed
467
+ diff = git_svc.compare_schemas("cv-parser", "v2.1.0", "v2.1.1")
468
+ print("Changes in staging:")
469
+ print(diff)
470
+ ```
471
+
472
+ ### 2. Reproducible Evaluations
473
+
474
+ **Problem**: Need to ensure evaluations use exact same schema version.
475
+
476
+ ```python
477
+ from rem.services.git_service import GitService
478
+ from rem.agentic.factory import create_pydantic_ai_agent
479
+
480
+ git_svc = GitService()
481
+
482
+ # Load pinned versions
483
+ schema = git_svc.load_schema("cv-parser", version="v2.1.0")
484
+ experiment = git_svc.load_experiment("cv-eval", version="v1.0.0")
485
+
486
+ # Create agent from versioned schema
487
+ agent = create_pydantic_ai_agent(schema)
488
+
489
+ # Log exact versions for reproducibility
490
+ metadata = {
491
+ "schema_version": "v2.1.0",
492
+ "schema_commit": git_svc.get_commit("schemas/cv-parser.yaml", "v2.1.0"),
493
+ "experiment_version": "v1.0.0",
494
+ "timestamp": datetime.now().isoformat()
495
+ }
496
+
497
+ # Run evaluation
498
+ result = await agent.run(experiment["test_input"])
499
+ ```
500
+
501
+ ### 3. Multi-Tenant Schema Management
502
+
503
+ **Problem**: Different tenants need different schema versions.
504
+
505
+ ```python
506
+ from rem.services.git_service import GitService
507
+
508
+ git_svc = GitService()
509
+
510
+ # Tenant configuration
511
+ TENANT_SCHEMA_VERSIONS = {
512
+ "acme-corp": "v2.0.0", # Conservative, stable
513
+ "beta-corp": "v2.1.0", # Early adopter
514
+ "enterprise-corp": "v1.5.0", # Custom version
515
+ }
516
+
517
+ def get_tenant_schema(tenant_id: str):
518
+ version = TENANT_SCHEMA_VERSIONS.get(tenant_id, "v2.1.0") # Default latest
519
+ return git_svc.load_schema("cv-parser", version=version)
520
+
521
+ # Load tenant-specific schema
522
+ acme_schema = get_tenant_schema("acme-corp") # Gets v2.0.0
523
+ beta_schema = get_tenant_schema("beta-corp") # Gets v2.1.0
524
+ ```
525
+
526
+ ### 4. Migration Planning
527
+
528
+ **Problem**: Need to understand impact of upgrading to new schema version.
529
+
530
+ ```python
531
+ from rem.services.git_service import GitService
532
+
533
+ git_svc = GitService()
534
+
535
+ current_version = "v2.1.0"
536
+ target_version = "v3.0.0"
537
+
538
+ # Check for breaking changes
539
+ if git_svc.has_breaking_changes("cv-parser", current_version, target_version):
540
+ print(f"⚠️ Breaking changes in {target_version}")
541
+
542
+ # Get detailed diff
543
+ diff = git_svc.compare_schemas("cv-parser", current_version, target_version)
544
+
545
+ # Analyze diff for specific patterns
546
+ if "- required:" in diff:
547
+ print("❌ Required fields removed")
548
+ if "- type:" in diff:
549
+ print("❌ Field types changed")
550
+
551
+ print("\nFull diff:")
552
+ print(diff)
553
+
554
+ # Create migration plan
555
+ print("\nMigration steps:")
556
+ print("1. Update agent to handle new schema")
557
+ print("2. Test with sample data")
558
+ print("3. Deploy to staging")
559
+ print("4. Gradual rollout to production")
560
+ else:
561
+ print(f"✅ No breaking changes in {target_version}")
562
+ print("Safe to upgrade")
563
+ ```
564
+
565
+ ### 5. Cluster Jobs with Versioned Schemas
566
+
567
+ **Problem**: Kubernetes jobs need to pull specific schema versions.
568
+
569
+ ```yaml
570
+ # evaluation-job.yaml
571
+ apiVersion: batch/v1
572
+ kind: Job
573
+ metadata:
574
+ name: cv-parser-eval-v2-1-0
575
+ spec:
576
+ template:
577
+ spec:
578
+ restartPolicy: Never
579
+ volumes:
580
+ - name: git-secret
581
+ secret:
582
+ secretName: git-creds
583
+ containers:
584
+ - name: eval-runner
585
+ image: percolationlabs/rem:latest
586
+ command: ["python", "-m", "rem.cli.eval"]
587
+ args:
588
+ - "run"
589
+ - "--schema=cv-parser"
590
+ - "--version=v2.1.0"
591
+ - "--experiment=cv-eval"
592
+ - "--experiment-version=v1.0.0"
593
+ env:
594
+ - name: GIT__ENABLED
595
+ value: "true"
596
+ - name: GIT__DEFAULT_REPO_URL
597
+ value: "ssh://git@github.com/my-org/schemas.git"
598
+ volumeMounts:
599
+ - name: git-secret
600
+ mountPath: /etc/git-secret
601
+ ```
602
+
603
+ ---
604
+
605
+ ## API Reference
606
+
607
+ ### GitService
608
+
609
+ High-level semantic operations.
610
+
611
+ ```python
612
+ from rem.services.git_service import GitService
613
+
614
+ git_svc = GitService(
615
+ fs=None, # FS instance (creates new if None)
616
+ schemas_dir="schemas", # Schemas directory in repo
617
+ experiments_dir="experiments" # Experiments directory
618
+ )
619
+ ```
620
+
621
+ #### Methods
622
+
623
+ **`list_schema_versions(schema_name, pattern=None)`**
624
+ ```python
625
+ versions = git_svc.list_schema_versions("cv-parser")
626
+ versions = git_svc.list_schema_versions("cv-parser", pattern="v2\\..*")
627
+
628
+ # Returns: list[dict]
629
+ # [
630
+ # {
631
+ # "tag": "v2.1.1",
632
+ # "version": (2, 1, 1),
633
+ # "commit": "abc123...",
634
+ # "date": "2025-01-15T10:30:00",
635
+ # "message": "feat: Add confidence",
636
+ # "author": "alice@example.com"
637
+ # },
638
+ # ...
639
+ # ]
640
+ ```
641
+
642
+ **`load_schema(schema_name, version=None)`**
643
+ ```python
644
+ schema = git_svc.load_schema("cv-parser") # Latest
645
+ schema = git_svc.load_schema("cv-parser", version="v2.1.0") # Specific
646
+
647
+ # Returns: dict (parsed YAML)
648
+ ```
649
+
650
+ **`compare_schemas(schema_name, version1, version2, unified=3)`**
651
+ ```python
652
+ diff = git_svc.compare_schemas("cv-parser", "v2.0.0", "v2.1.0")
653
+
654
+ # Returns: str (unified diff format)
655
+ ```
656
+
657
+ **`has_breaking_changes(schema_name, version1, version2)`**
658
+ ```python
659
+ has_breaking = git_svc.has_breaking_changes("cv-parser", "v2.0.0", "v3.0.0")
660
+
661
+ # Returns: bool
662
+ ```
663
+
664
+ **`load_experiment(experiment_name, version=None)`**
665
+ ```python
666
+ exp = git_svc.load_experiment("hello-world", version="v1.0.0")
667
+
668
+ # Returns: dict (parsed YAML)
669
+ ```
670
+
671
+ **`sync()`**
672
+ ```python
673
+ git_svc.sync() # Clear cache, force fresh clone
674
+
675
+ # Returns: None
676
+ ```
677
+
678
+ **`get_commit(path, version)`**
679
+ ```python
680
+ commit = git_svc.get_commit("schemas/cv-parser.yaml", "v2.1.0")
681
+
682
+ # Returns: str (40-character commit hash)
683
+ ```
684
+
685
+ ### GitProvider
686
+
687
+ Low-level Git operations (usually accessed via FS).
688
+
689
+ ```python
690
+ from rem.services.fs.git_provider import GitProvider
691
+
692
+ provider = GitProvider(
693
+ repo_url="ssh://git@github.com/org/repo.git",
694
+ branch="main",
695
+ cache_dir="/tmp/rem-git-cache"
696
+ )
697
+ ```
698
+
699
+ #### Methods
700
+
701
+ **`exists(uri)`**
702
+ ```python
703
+ exists = provider.exists("git://schemas/cv-parser.yaml?ref=v2.1.0")
704
+ # Returns: bool
705
+ ```
706
+
707
+ **`read(uri, **options)`**
708
+ ```python
709
+ content = provider.read("git://schemas/cv-parser.yaml?ref=v2.1.0")
710
+ # Returns: Any (format-specific)
711
+ ```
712
+
713
+ **`ls(uri, **options)`**
714
+ ```python
715
+ files = provider.ls("git://schemas/?ref=v2.1.0")
716
+ # Returns: list[str]
717
+ ```
718
+
719
+ **`get_semantic_versions(file_path, pattern=None)`**
720
+ ```python
721
+ versions = provider.get_semantic_versions("schemas/cv-parser.yaml")
722
+ versions = provider.get_semantic_versions("schemas/cv-parser.yaml", pattern="v2\\..*")
723
+ # Returns: list[dict]
724
+ ```
725
+
726
+ **`diff_versions(file_path, version1, version2, unified=3)`**
727
+ ```python
728
+ diff = provider.diff_versions("schemas/cv-parser.yaml", "v2.0.0", "v2.1.0")
729
+ # Returns: str
730
+ ```
731
+
732
+ **`clear_cache(ref=None)`**
733
+ ```python
734
+ provider.clear_cache("v2.1.0") # Clear specific version
735
+ provider.clear_cache() # Clear all versions
736
+ # Returns: None
737
+ ```
738
+
739
+ **`get_current_commit(ref=None)`**
740
+ ```python
741
+ commit = provider.get_current_commit("v2.1.0")
742
+ # Returns: str (40-character hash)
743
+ ```
744
+
745
+ ---
746
+
747
+ ## Security Best Practices
748
+
749
+ ### 1. Use Read-Only Deploy Keys
750
+
751
+ ```bash
752
+ # GitHub: Settings → Deploy keys
753
+ # ✓ Read access
754
+ # ✗ Write access
755
+
756
+ # GitLab: Settings → Repository → Deploy keys
757
+ # ✓ Read repository
758
+ # ✗ Write repository
759
+ ```
760
+
761
+ ### 2. Store Secrets in Kubernetes Secrets
762
+
763
+ ```bash
764
+ # ✅ GOOD: Kubernetes Secret
765
+ kubectl create secret generic git-creds \
766
+ --from-file=ssh=$HOME/.ssh/deploy_key
767
+
768
+ # ❌ BAD: Environment variable
769
+ export GIT__SSH_KEY="-----BEGIN OPENSSH PRIVATE KEY-----\n..."
770
+ ```
771
+
772
+ ### 3. Enable Known Hosts Verification
773
+
774
+ ```bash
775
+ # Generate known_hosts
776
+ ssh-keyscan github.com >> ~/.ssh/known_hosts
777
+
778
+ # Configure REM
779
+ export GIT__KNOWN_HOSTS_PATH="$HOME/.ssh/known_hosts"
780
+
781
+ # This prevents MITM attacks
782
+ ```
783
+
784
+ ### 4. Rotate PATs Regularly
785
+
786
+ ```bash
787
+ # GitHub: Set expiration to 90 days
788
+ # GitLab: Set expiration to 90 days
789
+
790
+ # Rotate before expiration
791
+ # Update Kubernetes Secret
792
+ kubectl create secret generic git-creds \
793
+ --from-literal=token=ghp_NEW_TOKEN \
794
+ --dry-run=client -o yaml | kubectl apply -f -
795
+ ```
796
+
797
+ ### 5. Use Least Privilege
798
+
799
+ ```bash
800
+ # GitHub Fine-grained PAT:
801
+ # Permissions → Contents → Read-only ✓
802
+ # Permissions → Contents → Read and write ✗
803
+
804
+ # SSH Deploy Key:
805
+ # Read access ✓
806
+ # Write access ✗
807
+ ```
808
+
809
+ ### 6. Audit Access
810
+
811
+ ```bash
812
+ # Monitor Git access logs
813
+ kubectl logs -l app=rem-api | grep "Git"
814
+
815
+ # GitHub/GitLab audit logs
816
+ # Settings → Security → Audit log
817
+ ```
818
+
819
+ ---
820
+
821
+ ## Performance & Caching
822
+
823
+ ### Cache Hit Rates
824
+
825
+ ```
826
+ Typical Performance:
827
+ - First clone: 1-10 seconds (depends on repo size)
828
+ - Cached read: <10ms (local filesystem)
829
+ - Shallow clone: 90% size reduction
830
+
831
+ Cache Efficiency:
832
+ - Same version, multiple reads: 100% cache hit
833
+ - Different versions: Separate cache entries
834
+ - Branch updates: Manual sync required
835
+ ```
836
+
837
+ ### Shallow Clones
838
+
839
+ ```python
840
+ # Enable shallow clones (default)
841
+ export GIT__SHALLOW_CLONE=true
842
+
843
+ # Benefits:
844
+ # - Faster clone (only latest commit)
845
+ # - Less disk space (no history)
846
+ # - Recommended for production
847
+
848
+ # Disable for full history
849
+ export GIT__SHALLOW_CLONE=false
850
+ ```
851
+
852
+ ### Cache Management
853
+
854
+ ```python
855
+ from rem.services.git_service import GitService
856
+
857
+ git_svc = GitService()
858
+
859
+ # Manual sync (clear cache, pull latest)
860
+ git_svc.sync()
861
+
862
+ # Periodic sync (configure interval)
863
+ export GIT__SYNC_INTERVAL=300 # 5 minutes
864
+ ```
865
+
866
+ ### Monitoring
867
+
868
+ ```python
869
+ import os
870
+ from pathlib import Path
871
+
872
+ cache_dir = Path(os.environ.get("GIT__CACHE_DIR", "/tmp/rem-git-cache"))
873
+
874
+ # Check cache size
875
+ def get_cache_size():
876
+ total = sum(
877
+ f.stat().st_size
878
+ for f in cache_dir.rglob("*")
879
+ if f.is_file()
880
+ )
881
+ return total / (1024 ** 2) # MB
882
+
883
+ print(f"Git cache size: {get_cache_size():.2f} MB")
884
+
885
+ # List cached repos
886
+ for repo_dir in cache_dir.iterdir():
887
+ if repo_dir.is_dir():
888
+ print(f"Repo: {repo_dir.name}")
889
+ for ref_dir in repo_dir.iterdir():
890
+ if ref_dir.is_dir():
891
+ print(f" - {ref_dir.name}")
892
+ ```
893
+
894
+ ---
895
+
896
+ ## Troubleshooting
897
+
898
+ ### SSH Key Not Found
899
+
900
+ **Error**:
901
+ ```
902
+ FileNotFoundError: SSH key not found at /etc/git-secret/ssh
903
+ ```
904
+
905
+ **Solution**:
906
+ ```bash
907
+ # Check if secret is mounted
908
+ kubectl describe pod rem-api-xxx | grep git-secret
909
+
910
+ # Verify secret exists
911
+ kubectl get secret git-creds -n rem-app
912
+
913
+ # Check file permissions
914
+ kubectl exec rem-api-xxx -- ls -la /etc/git-secret/
915
+
916
+ # Expected:
917
+ # -r-------- 1 rem rem 464 Jan 15 10:30 ssh
918
+ # -r-------- 1 rem rem 444 Jan 15 10:30 known_hosts
919
+ ```
920
+
921
+ ### Authentication Failed
922
+
923
+ **Error**:
924
+ ```
925
+ GitCommandError: Permission denied (publickey)
926
+ ```
927
+
928
+ **Solution**:
929
+ ```bash
930
+ # Test SSH key locally
931
+ ssh -i /path/to/key git@github.com
932
+
933
+ # Check deploy key in GitHub
934
+ # Settings → Deploy keys → Verify key is added
935
+
936
+ # Verify known_hosts contains host
937
+ grep github.com ~/.ssh/known_hosts
938
+
939
+ # Regenerate known_hosts if needed
940
+ ssh-keyscan github.com > ~/.ssh/known_hosts
941
+ ```
942
+
943
+ ### Rate Limit Exceeded (HTTPS)
944
+
945
+ **Error**:
946
+ ```
947
+ API rate limit exceeded for user
948
+ ```
949
+
950
+ **Solution**:
951
+ ```bash
952
+ # Switch to SSH authentication
953
+ export GIT__DEFAULT_REPO_URL="ssh://git@github.com/org/repo.git"
954
+
955
+ # Or use a GitHub App token (higher limits)
956
+ export GIT__PERSONAL_ACCESS_TOKEN="ghp_..."
957
+ ```
958
+
959
+ ### Repo Clone Timeout
960
+
961
+ **Error**:
962
+ ```
963
+ GitCommandError: timeout after 60s
964
+ ```
965
+
966
+ **Solution**:
967
+ ```bash
968
+ # Enable shallow clone
969
+ export GIT__SHALLOW_CLONE=true
970
+
971
+ # Or increase Git timeout
972
+ git config --global http.postBuffer 524288000
973
+
974
+ # Check network connectivity
975
+ kubectl exec rem-api-xxx -- ping github.com
976
+ ```
977
+
978
+ ### Cache Corruption
979
+
980
+ **Error**:
981
+ ```
982
+ InvalidGitRepositoryError: /tmp/rem-git-cache/xxx is not a git repository
983
+ ```
984
+
985
+ **Solution**:
986
+ ```python
987
+ from rem.services.git_service import GitService
988
+
989
+ git_svc = GitService()
990
+
991
+ # Clear corrupted cache
992
+ git_svc.sync()
993
+
994
+ # Or manually delete cache
995
+ rm -rf /tmp/rem-git-cache/*
996
+ ```
997
+
998
+ ### File Not Found at Version
999
+
1000
+ **Error**:
1001
+ ```
1002
+ FileNotFoundError: Path 'schemas/agent.yaml' not found at ref 'v2.1.0'
1003
+ ```
1004
+
1005
+ **Solution**:
1006
+ ```python
1007
+ from rem.services.git_service import GitService
1008
+
1009
+ git_svc = GitService()
1010
+
1011
+ # List available versions
1012
+ versions = git_svc.list_schema_versions("agent")
1013
+
1014
+ # Check if file exists at tag
1015
+ # File may have been renamed or moved
1016
+ ```
1017
+
1018
+ ### Known Hosts Verification Failed
1019
+
1020
+ **Error**:
1021
+ ```
1022
+ Host key verification failed
1023
+ ```
1024
+
1025
+ **Solution**:
1026
+ ```bash
1027
+ # Add host to known_hosts
1028
+ ssh-keyscan github.com >> ~/.ssh/known_hosts
1029
+
1030
+ # Update Kubernetes Secret
1031
+ kubectl create secret generic git-creds \
1032
+ --from-file=ssh=$HOME/.ssh/deploy_key \
1033
+ --from-file=known_hosts=$HOME/.ssh/known_hosts \
1034
+ --dry-run=client -o yaml | kubectl apply -f -
1035
+
1036
+ # Restart pods
1037
+ kubectl rollout restart deployment/rem-api -n rem-app
1038
+ ```
1039
+
1040
+ ---
1041
+
1042
+ ## Environment Variables Reference
1043
+
1044
+ | Variable | Default | Description |
1045
+ |----------|---------|-------------|
1046
+ | `GIT__ENABLED` | `false` | Enable Git provider |
1047
+ | `GIT__DEFAULT_REPO_URL` | `None` | Git repository URL (ssh:// or https://) |
1048
+ | `GIT__DEFAULT_BRANCH` | `main` | Default branch to clone |
1049
+ | `GIT__SSH_KEY_PATH` | `/etc/git-secret/ssh` | Path to SSH private key |
1050
+ | `GIT__KNOWN_HOSTS_PATH` | `/etc/git-secret/known_hosts` | Path to known_hosts file |
1051
+ | `GIT__PERSONAL_ACCESS_TOKEN` | `None` | PAT for HTTPS authentication |
1052
+ | `GIT__CACHE_DIR` | `/tmp/rem-git-cache` | Local cache directory |
1053
+ | `GIT__SHALLOW_CLONE` | `true` | Use shallow clone (--depth=1) |
1054
+ | `GIT__VERIFY_SSL` | `true` | Verify SSL certificates |
1055
+ | `GIT__SYNC_INTERVAL` | `300` | Sync interval in seconds |
1056
+
1057
+ ---
1058
+
1059
+ ## Additional Resources
1060
+
1061
+ - [GitPython Documentation](https://gitpython.readthedocs.io/)
1062
+ - [Semantic Versioning Spec](https://semver.org/)
1063
+ - [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/)
1064
+ - [GitHub Deploy Keys](https://docs.github.com/en/developers/overview/managing-deploy-keys)
1065
+ - [GitLab Deploy Keys](https://docs.gitlab.com/ee/user/project/deploy_keys/)
1066
+ - [git-sync Sidecar](https://github.com/kubernetes/git-sync)
1067
+
1068
+ ---
1069
+
1070
+ ## Support
1071
+
1072
+ For issues or questions:
1073
+ 1. Check [Troubleshooting](#troubleshooting) section
1074
+ 2. Review logs: `kubectl logs -l app=rem-api | grep Git`
1075
+ 3. Open issue: https://github.com/your-org/rem/issues