remdb 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +566 -0
- rem/cli/commands/configure.py +497 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1302 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +96 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +676 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +336 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.3.0.dist-info/METADATA +1455 -0
- remdb-0.3.0.dist-info/RECORD +187 -0
- remdb-0.3.0.dist-info/WHEEL +4 -0
- remdb-0.3.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,1075 @@
|
|
|
1
|
+
# Git Provider for Versioned Schema & Experiment Syncing
|
|
2
|
+
|
|
3
|
+
REM's Git provider enables syncing of agent schemas, evaluators, and experiments from Git repositories with full semantic versioning support. Designed for Kubernetes cluster environments with proper secret management.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
- [Quick Start](#quick-start)
|
|
8
|
+
- [Architecture](#architecture)
|
|
9
|
+
- [Authentication](#authentication)
|
|
10
|
+
- [URI Format](#uri-format)
|
|
11
|
+
- [Semantic Versioning](#semantic-versioning)
|
|
12
|
+
- [Kubernetes Deployment](#kubernetes-deployment)
|
|
13
|
+
- [Use Cases](#use-cases)
|
|
14
|
+
- [API Reference](#api-reference)
|
|
15
|
+
- [Security Best Practices](#security-best-practices)
|
|
16
|
+
- [Performance & Caching](#performance--caching)
|
|
17
|
+
- [Troubleshooting](#troubleshooting)
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
### Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Add GitPython dependency
|
|
27
|
+
cd rem
|
|
28
|
+
uv add GitPython
|
|
29
|
+
|
|
30
|
+
# Or with pip
|
|
31
|
+
pip install GitPython
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Configuration
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Enable Git provider
|
|
38
|
+
export GIT__ENABLED=true
|
|
39
|
+
export GIT__DEFAULT_REPO_URL="ssh://git@github.com/my-org/my-repo.git"
|
|
40
|
+
|
|
41
|
+
# Optional: Configure cache and SSH paths
|
|
42
|
+
export GIT__CACHE_DIR="/tmp/rem-git-cache"
|
|
43
|
+
export GIT__SSH_KEY_PATH="/etc/git-secret/ssh"
|
|
44
|
+
export GIT__KNOWN_HOSTS_PATH="/etc/git-secret/known_hosts"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Basic Usage
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from rem.services.fs import FS
|
|
51
|
+
from rem.services.git_service import GitService
|
|
52
|
+
|
|
53
|
+
# Filesystem interface (low-level)
|
|
54
|
+
fs = FS()
|
|
55
|
+
schema = fs.read("git://schemas/cv-parser.yaml?ref=v2.1.0")
|
|
56
|
+
schemas = fs.ls("git://schemas/")
|
|
57
|
+
|
|
58
|
+
# GitService interface (high-level, recommended)
|
|
59
|
+
git_svc = GitService()
|
|
60
|
+
|
|
61
|
+
# List schema versions
|
|
62
|
+
versions = git_svc.list_schema_versions("cv-parser")
|
|
63
|
+
print(f"Latest: {versions[0]['tag']}") # v2.1.1
|
|
64
|
+
|
|
65
|
+
# Load specific version
|
|
66
|
+
schema = git_svc.load_schema("cv-parser", version="v2.1.0")
|
|
67
|
+
|
|
68
|
+
# Compare versions
|
|
69
|
+
diff = git_svc.compare_schemas("cv-parser", "v2.0.0", "v2.1.0")
|
|
70
|
+
print(diff)
|
|
71
|
+
|
|
72
|
+
# Check for breaking changes
|
|
73
|
+
if git_svc.has_breaking_changes("cv-parser", "v2.0.0", "v2.1.0"):
|
|
74
|
+
print("⚠️ Manual migration required")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Architecture
|
|
80
|
+
|
|
81
|
+
### Component Overview
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
GitService (High-level semantic operations)
|
|
85
|
+
↓
|
|
86
|
+
FS.git_provider (Thin wrapper for FS interface)
|
|
87
|
+
↓
|
|
88
|
+
GitProvider (Git operations with caching)
|
|
89
|
+
↓
|
|
90
|
+
GitPython (Git CLI wrapper)
|
|
91
|
+
↓
|
|
92
|
+
Git CLI (System git command)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Path Conventions
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
Repository Structure:
|
|
99
|
+
repo/
|
|
100
|
+
├── schemas/ # Agent schemas
|
|
101
|
+
│ ├── cv-parser.yaml # git://schemas/cv-parser.yaml
|
|
102
|
+
│ ├── contract-analyzer.yaml
|
|
103
|
+
│ └── evaluators/ # Evaluator schemas
|
|
104
|
+
│ ├── cv-correctness.yaml
|
|
105
|
+
│ └── contract-risk.yaml
|
|
106
|
+
└── experiments/ # Evaluation experiments
|
|
107
|
+
├── hello-world/
|
|
108
|
+
│ ├── config.yaml
|
|
109
|
+
│ └── ground_truth.csv
|
|
110
|
+
└── cv-parser-test/
|
|
111
|
+
├── config.yaml
|
|
112
|
+
└── resumes/
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Caching Strategy
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
Local Cache Structure:
|
|
119
|
+
/tmp/rem-git-cache/
|
|
120
|
+
└── {repo_hash}/ # SHA256 hash of repo URL
|
|
121
|
+
├── main/ # Default branch
|
|
122
|
+
│ ├── schemas/
|
|
123
|
+
│ └── experiments/
|
|
124
|
+
├── v2.1.0/ # Tag
|
|
125
|
+
│ └── schemas/
|
|
126
|
+
└── v2.1.1/ # Tag
|
|
127
|
+
└── schemas/
|
|
128
|
+
|
|
129
|
+
Cache Invalidation:
|
|
130
|
+
- Manual: git_svc.sync() or provider.clear_cache()
|
|
131
|
+
- Automatic: Configurable sync interval (default: 5 minutes)
|
|
132
|
+
- Per-ref: Cache cleared per tag/branch
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Authentication
|
|
138
|
+
|
|
139
|
+
### Method 1: SSH Keys (Recommended for Production)
|
|
140
|
+
|
|
141
|
+
**Setup**:
|
|
142
|
+
```bash
|
|
143
|
+
# Generate SSH key (if needed)
|
|
144
|
+
ssh-keygen -t ed25519 -C "rem-cluster@example.com" -f ~/.ssh/rem_deploy_key
|
|
145
|
+
|
|
146
|
+
# Add public key as deploy key in GitHub/GitLab
|
|
147
|
+
# Settings → Deploy keys → Add deploy key
|
|
148
|
+
# ✓ Read-only access
|
|
149
|
+
# ✗ Write access (not needed)
|
|
150
|
+
|
|
151
|
+
# Configure REM
|
|
152
|
+
export GIT__SSH_KEY_PATH="$HOME/.ssh/rem_deploy_key"
|
|
153
|
+
export GIT__KNOWN_HOSTS_PATH="$HOME/.ssh/known_hosts"
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
**Advantages**:
|
|
157
|
+
- ✅ No rate limits
|
|
158
|
+
- ✅ Full Git protocol support
|
|
159
|
+
- ✅ Works with private repos
|
|
160
|
+
- ✅ More secure (no token in environment)
|
|
161
|
+
|
|
162
|
+
**Known Hosts Setup**:
|
|
163
|
+
```bash
|
|
164
|
+
# Add GitHub to known_hosts
|
|
165
|
+
ssh-keyscan github.com >> ~/.ssh/known_hosts
|
|
166
|
+
|
|
167
|
+
# Add GitLab to known_hosts
|
|
168
|
+
ssh-keyscan gitlab.com >> ~/.ssh/known_hosts
|
|
169
|
+
|
|
170
|
+
# Add self-hosted Git server
|
|
171
|
+
ssh-keyscan git.example.com >> ~/.ssh/known_hosts
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Method 2: HTTPS with Personal Access Token
|
|
175
|
+
|
|
176
|
+
**Setup**:
|
|
177
|
+
```bash
|
|
178
|
+
# Create PAT in GitHub/GitLab
|
|
179
|
+
# GitHub: Settings → Developer settings → Personal access tokens → Fine-grained tokens
|
|
180
|
+
# Permissions: Contents (read-only)
|
|
181
|
+
|
|
182
|
+
export GIT__PERSONAL_ACCESS_TOKEN="ghp_xxxxxxxxxxxxxxxxxxxx"
|
|
183
|
+
export GIT__DEFAULT_REPO_URL="https://github.com/my-org/my-repo.git"
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
**Rate Limits**:
|
|
187
|
+
- GitHub: 5,000 API requests/hour per authenticated user
|
|
188
|
+
- GitLab: 2,000 API requests/hour per user
|
|
189
|
+
- Bitbucket: 1,000 API requests/hour per user
|
|
190
|
+
|
|
191
|
+
**Advantages**:
|
|
192
|
+
- ✅ Easier local development setup
|
|
193
|
+
- ✅ Works with corporate proxies
|
|
194
|
+
- ✅ Fine-grained permissions (GitHub)
|
|
195
|
+
|
|
196
|
+
**Disadvantages**:
|
|
197
|
+
- ❌ Rate limits apply
|
|
198
|
+
- ❌ Token in environment variable
|
|
199
|
+
- ❌ Token rotation required
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## URI Format
|
|
204
|
+
|
|
205
|
+
### Syntax
|
|
206
|
+
|
|
207
|
+
```
|
|
208
|
+
git://{path}[?ref={version}]
|
|
209
|
+
|
|
210
|
+
Where:
|
|
211
|
+
- path: Path within repository (e.g., "schemas/cv-parser.yaml")
|
|
212
|
+
- ref: Optional Git reference (branch, tag, or commit hash)
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Examples
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
# Read from default branch (main)
|
|
219
|
+
fs.read("git://schemas/cv-parser.yaml")
|
|
220
|
+
|
|
221
|
+
# Read from specific tag
|
|
222
|
+
fs.read("git://schemas/cv-parser.yaml?ref=v2.1.0")
|
|
223
|
+
|
|
224
|
+
# Read from branch
|
|
225
|
+
fs.read("git://schemas/cv-parser.yaml?ref=feature-branch")
|
|
226
|
+
|
|
227
|
+
# Read from commit hash
|
|
228
|
+
fs.read("git://schemas/cv-parser.yaml?ref=abc123def456")
|
|
229
|
+
|
|
230
|
+
# List directory
|
|
231
|
+
fs.ls("git://schemas/")
|
|
232
|
+
fs.ls("git://experiments/hello-world/?ref=v1.0.0")
|
|
233
|
+
|
|
234
|
+
# Check existence
|
|
235
|
+
fs.exists("git://schemas/cv-parser.yaml?ref=v2.1.0")
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## Semantic Versioning
|
|
241
|
+
|
|
242
|
+
### Version Tracking
|
|
243
|
+
|
|
244
|
+
REM follows [Semantic Versioning 2.0.0](https://semver.org/):
|
|
245
|
+
|
|
246
|
+
```
|
|
247
|
+
MAJOR.MINOR.PATCH
|
|
248
|
+
|
|
249
|
+
Examples:
|
|
250
|
+
- v2.1.0 → v2.1.1: PATCH (bug fix, backwards compatible)
|
|
251
|
+
- v2.1.1 → v2.2.0: MINOR (new feature, backwards compatible)
|
|
252
|
+
- v2.2.0 → v3.0.0: MAJOR (breaking change, not backwards compatible)
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
### Get Version History
|
|
256
|
+
|
|
257
|
+
```python
|
|
258
|
+
from rem.services.git_service import GitService
|
|
259
|
+
|
|
260
|
+
git_svc = GitService()
|
|
261
|
+
|
|
262
|
+
# Get all versions
|
|
263
|
+
versions = git_svc.list_schema_versions("cv-parser")
|
|
264
|
+
|
|
265
|
+
for v in versions:
|
|
266
|
+
print(f"{v['tag']}: {v['message']} by {v['author']} on {v['date']}")
|
|
267
|
+
|
|
268
|
+
# Output:
|
|
269
|
+
# v2.1.1: feat: Add confidence scoring by alice@example.com on 2025-01-15T10:30:00
|
|
270
|
+
# v2.1.0: feat: Add multi-language support by bob@example.com on 2025-01-10T14:20:00
|
|
271
|
+
# v2.0.0: feat!: Redesign output schema by alice@example.com on 2025-01-05T09:00:00
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
### Filter by Version Pattern
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
# Get only v2.x.x versions
|
|
278
|
+
v2_versions = git_svc.list_schema_versions("cv-parser", pattern="v2\\..*")
|
|
279
|
+
|
|
280
|
+
# Get only v2.1.x versions
|
|
281
|
+
v2_1_versions = git_svc.list_schema_versions("cv-parser", pattern="v2\\.1\\..*")
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
### Compare Versions
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
# Get diff between versions
|
|
288
|
+
diff = git_svc.compare_schemas("cv-parser", "v2.1.0", "v2.1.1")
|
|
289
|
+
print(diff)
|
|
290
|
+
|
|
291
|
+
# Output:
|
|
292
|
+
# --- a/schemas/cv-parser.yaml
|
|
293
|
+
# +++ b/schemas/cv-parser.yaml
|
|
294
|
+
# @@ -10,6 +10,8 @@
|
|
295
|
+
# skills:
|
|
296
|
+
# type: array
|
|
297
|
+
# + items:
|
|
298
|
+
# + type: string
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### Breaking Change Detection
|
|
302
|
+
|
|
303
|
+
```python
|
|
304
|
+
# Check for breaking changes
|
|
305
|
+
has_breaking = git_svc.has_breaking_changes("cv-parser", "v2.1.0", "v3.0.0")
|
|
306
|
+
|
|
307
|
+
if has_breaking:
|
|
308
|
+
print("⚠️ Breaking changes detected!")
|
|
309
|
+
print("Manual migration required.")
|
|
310
|
+
|
|
311
|
+
# Show diff
|
|
312
|
+
diff = git_svc.compare_schemas("cv-parser", "v2.1.0", "v3.0.0")
|
|
313
|
+
print(diff)
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
---
|
|
317
|
+
|
|
318
|
+
## Kubernetes Deployment
|
|
319
|
+
|
|
320
|
+
### Secret Creation
|
|
321
|
+
|
|
322
|
+
```bash
|
|
323
|
+
# Create Kubernetes Secret with SSH key
|
|
324
|
+
kubectl create secret generic git-creds \
|
|
325
|
+
--from-file=ssh=$HOME/.ssh/rem_deploy_key \
|
|
326
|
+
--from-file=known_hosts=$HOME/.ssh/known_hosts \
|
|
327
|
+
--namespace rem-app
|
|
328
|
+
|
|
329
|
+
# Verify secret
|
|
330
|
+
kubectl get secret git-creds -n rem-app -o yaml
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
### Deployment YAML
|
|
334
|
+
|
|
335
|
+
```yaml
|
|
336
|
+
apiVersion: apps/v1
|
|
337
|
+
kind: Deployment
|
|
338
|
+
metadata:
|
|
339
|
+
name: rem-api
|
|
340
|
+
namespace: rem-app
|
|
341
|
+
spec:
|
|
342
|
+
replicas: 2
|
|
343
|
+
selector:
|
|
344
|
+
matchLabels:
|
|
345
|
+
app: rem-api
|
|
346
|
+
template:
|
|
347
|
+
metadata:
|
|
348
|
+
labels:
|
|
349
|
+
app: rem-api
|
|
350
|
+
spec:
|
|
351
|
+
# Security context for SSH key permissions
|
|
352
|
+
securityContext:
|
|
353
|
+
fsGroup: 65533 # git user group
|
|
354
|
+
|
|
355
|
+
volumes:
|
|
356
|
+
# Mount Git credentials from Secret
|
|
357
|
+
- name: git-secret
|
|
358
|
+
secret:
|
|
359
|
+
secretName: git-creds
|
|
360
|
+
defaultMode: 0400 # Read-only for owner
|
|
361
|
+
|
|
362
|
+
containers:
|
|
363
|
+
- name: rem-api
|
|
364
|
+
image: percolationlabs/rem:latest
|
|
365
|
+
|
|
366
|
+
env:
|
|
367
|
+
# Enable Git provider
|
|
368
|
+
- name: GIT__ENABLED
|
|
369
|
+
value: "true"
|
|
370
|
+
- name: GIT__DEFAULT_REPO_URL
|
|
371
|
+
value: "ssh://git@github.com/my-org/my-repo.git"
|
|
372
|
+
- name: GIT__SSH_KEY_PATH
|
|
373
|
+
value: "/etc/git-secret/ssh"
|
|
374
|
+
- name: GIT__KNOWN_HOSTS_PATH
|
|
375
|
+
value: "/etc/git-secret/known_hosts"
|
|
376
|
+
- name: GIT__CACHE_DIR
|
|
377
|
+
value: "/app/git-cache"
|
|
378
|
+
- name: GIT__SHALLOW_CLONE
|
|
379
|
+
value: "true"
|
|
380
|
+
|
|
381
|
+
volumeMounts:
|
|
382
|
+
# Mount Git credentials
|
|
383
|
+
- name: git-secret
|
|
384
|
+
mountPath: /etc/git-secret
|
|
385
|
+
readOnly: true
|
|
386
|
+
|
|
387
|
+
resources:
|
|
388
|
+
requests:
|
|
389
|
+
memory: "256Mi"
|
|
390
|
+
cpu: "100m"
|
|
391
|
+
limits:
|
|
392
|
+
memory: "512Mi"
|
|
393
|
+
cpu: "500m"
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
### Git-Sync Sidecar Pattern (Alternative)
|
|
397
|
+
|
|
398
|
+
```yaml
|
|
399
|
+
apiVersion: apps/v1
|
|
400
|
+
kind: Deployment
|
|
401
|
+
metadata:
|
|
402
|
+
name: rem-api-with-git-sync
|
|
403
|
+
spec:
|
|
404
|
+
template:
|
|
405
|
+
spec:
|
|
406
|
+
volumes:
|
|
407
|
+
- name: git-secret
|
|
408
|
+
secret:
|
|
409
|
+
secretName: git-creds
|
|
410
|
+
- name: git-repo
|
|
411
|
+
emptyDir: {}
|
|
412
|
+
|
|
413
|
+
containers:
|
|
414
|
+
# Main application container
|
|
415
|
+
- name: rem-api
|
|
416
|
+
image: percolationlabs/rem:latest
|
|
417
|
+
env:
|
|
418
|
+
- name: GIT__ENABLED
|
|
419
|
+
value: "false" # Use git-sync instead
|
|
420
|
+
volumeMounts:
|
|
421
|
+
- name: git-repo
|
|
422
|
+
mountPath: /app/git-repo
|
|
423
|
+
readOnly: true
|
|
424
|
+
|
|
425
|
+
# Git-sync sidecar (keeps repo in sync)
|
|
426
|
+
- name: git-sync
|
|
427
|
+
image: registry.k8s.io/git-sync/git-sync:v4.0.0
|
|
428
|
+
env:
|
|
429
|
+
- name: GITSYNC_REPO
|
|
430
|
+
value: "ssh://git@github.com/my-org/my-repo.git"
|
|
431
|
+
- name: GITSYNC_ROOT
|
|
432
|
+
value: "/git"
|
|
433
|
+
- name: GITSYNC_DEST
|
|
434
|
+
value: "repo"
|
|
435
|
+
- name: GITSYNC_PERIOD
|
|
436
|
+
value: "30s" # Sync every 30 seconds
|
|
437
|
+
- name: GITSYNC_SSH_KEY_FILE
|
|
438
|
+
value: "/etc/git-secret/ssh"
|
|
439
|
+
volumeMounts:
|
|
440
|
+
- name: git-repo
|
|
441
|
+
mountPath: /git
|
|
442
|
+
- name: git-secret
|
|
443
|
+
mountPath: /etc/git-secret
|
|
444
|
+
readOnly: true
|
|
445
|
+
```
|
|
446
|
+
|
|
447
|
+
---
|
|
448
|
+
|
|
449
|
+
## Use Cases
|
|
450
|
+
|
|
451
|
+
### 1. Schema Versioning
|
|
452
|
+
|
|
453
|
+
**Problem**: Need to track agent schema evolution and compare versions.
|
|
454
|
+
|
|
455
|
+
```python
|
|
456
|
+
from rem.services.git_service import GitService
|
|
457
|
+
|
|
458
|
+
git_svc = GitService()
|
|
459
|
+
|
|
460
|
+
# Production uses v2.1.0
|
|
461
|
+
prod_schema = git_svc.load_schema("cv-parser", version="v2.1.0")
|
|
462
|
+
|
|
463
|
+
# Staging tests v2.1.1
|
|
464
|
+
staging_schema = git_svc.load_schema("cv-parser", version="v2.1.1")
|
|
465
|
+
|
|
466
|
+
# Compare to see what changed
|
|
467
|
+
diff = git_svc.compare_schemas("cv-parser", "v2.1.0", "v2.1.1")
|
|
468
|
+
print("Changes in staging:")
|
|
469
|
+
print(diff)
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
### 2. Reproducible Evaluations
|
|
473
|
+
|
|
474
|
+
**Problem**: Need to ensure evaluations use exact same schema version.
|
|
475
|
+
|
|
476
|
+
```python
|
|
477
|
+
from rem.services.git_service import GitService
|
|
478
|
+
from rem.agentic.factory import create_pydantic_ai_agent
|
|
479
|
+
|
|
480
|
+
git_svc = GitService()
|
|
481
|
+
|
|
482
|
+
# Load pinned versions
|
|
483
|
+
schema = git_svc.load_schema("cv-parser", version="v2.1.0")
|
|
484
|
+
experiment = git_svc.load_experiment("cv-eval", version="v1.0.0")
|
|
485
|
+
|
|
486
|
+
# Create agent from versioned schema
|
|
487
|
+
agent = create_pydantic_ai_agent(schema)
|
|
488
|
+
|
|
489
|
+
# Log exact versions for reproducibility
|
|
490
|
+
metadata = {
|
|
491
|
+
"schema_version": "v2.1.0",
|
|
492
|
+
"schema_commit": git_svc.get_commit("schemas/cv-parser.yaml", "v2.1.0"),
|
|
493
|
+
"experiment_version": "v1.0.0",
|
|
494
|
+
"timestamp": datetime.now().isoformat()
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
# Run evaluation
|
|
498
|
+
result = await agent.run(experiment["test_input"])
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
### 3. Multi-Tenant Schema Management
|
|
502
|
+
|
|
503
|
+
**Problem**: Different tenants need different schema versions.
|
|
504
|
+
|
|
505
|
+
```python
|
|
506
|
+
from rem.services.git_service import GitService
|
|
507
|
+
|
|
508
|
+
git_svc = GitService()
|
|
509
|
+
|
|
510
|
+
# Tenant configuration
|
|
511
|
+
TENANT_SCHEMA_VERSIONS = {
|
|
512
|
+
"acme-corp": "v2.0.0", # Conservative, stable
|
|
513
|
+
"beta-corp": "v2.1.0", # Early adopter
|
|
514
|
+
"enterprise-corp": "v1.5.0", # Custom version
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
def get_tenant_schema(tenant_id: str):
|
|
518
|
+
version = TENANT_SCHEMA_VERSIONS.get(tenant_id, "v2.1.0") # Default latest
|
|
519
|
+
return git_svc.load_schema("cv-parser", version=version)
|
|
520
|
+
|
|
521
|
+
# Load tenant-specific schema
|
|
522
|
+
acme_schema = get_tenant_schema("acme-corp") # Gets v2.0.0
|
|
523
|
+
beta_schema = get_tenant_schema("beta-corp") # Gets v2.1.0
|
|
524
|
+
```
|
|
525
|
+
|
|
526
|
+
### 4. Migration Planning
|
|
527
|
+
|
|
528
|
+
**Problem**: Need to understand impact of upgrading to new schema version.
|
|
529
|
+
|
|
530
|
+
```python
|
|
531
|
+
from rem.services.git_service import GitService
|
|
532
|
+
|
|
533
|
+
git_svc = GitService()
|
|
534
|
+
|
|
535
|
+
current_version = "v2.1.0"
|
|
536
|
+
target_version = "v3.0.0"
|
|
537
|
+
|
|
538
|
+
# Check for breaking changes
|
|
539
|
+
if git_svc.has_breaking_changes("cv-parser", current_version, target_version):
|
|
540
|
+
print(f"⚠️ Breaking changes in {target_version}")
|
|
541
|
+
|
|
542
|
+
# Get detailed diff
|
|
543
|
+
diff = git_svc.compare_schemas("cv-parser", current_version, target_version)
|
|
544
|
+
|
|
545
|
+
# Analyze diff for specific patterns
|
|
546
|
+
if "- required:" in diff:
|
|
547
|
+
print("❌ Required fields removed")
|
|
548
|
+
if "- type:" in diff:
|
|
549
|
+
print("❌ Field types changed")
|
|
550
|
+
|
|
551
|
+
print("\nFull diff:")
|
|
552
|
+
print(diff)
|
|
553
|
+
|
|
554
|
+
# Create migration plan
|
|
555
|
+
print("\nMigration steps:")
|
|
556
|
+
print("1. Update agent to handle new schema")
|
|
557
|
+
print("2. Test with sample data")
|
|
558
|
+
print("3. Deploy to staging")
|
|
559
|
+
print("4. Gradual rollout to production")
|
|
560
|
+
else:
|
|
561
|
+
print(f"✅ No breaking changes in {target_version}")
|
|
562
|
+
print("Safe to upgrade")
|
|
563
|
+
```
|
|
564
|
+
|
|
565
|
+
### 5. Cluster Jobs with Versioned Schemas
|
|
566
|
+
|
|
567
|
+
**Problem**: Kubernetes jobs need to pull specific schema versions.
|
|
568
|
+
|
|
569
|
+
```yaml
|
|
570
|
+
# evaluation-job.yaml
|
|
571
|
+
apiVersion: batch/v1
|
|
572
|
+
kind: Job
|
|
573
|
+
metadata:
|
|
574
|
+
name: cv-parser-eval-v2-1-0
|
|
575
|
+
spec:
|
|
576
|
+
template:
|
|
577
|
+
spec:
|
|
578
|
+
restartPolicy: Never
|
|
579
|
+
volumes:
|
|
580
|
+
- name: git-secret
|
|
581
|
+
secret:
|
|
582
|
+
secretName: git-creds
|
|
583
|
+
containers:
|
|
584
|
+
- name: eval-runner
|
|
585
|
+
image: percolationlabs/rem:latest
|
|
586
|
+
command: ["python", "-m", "rem.cli.eval"]
|
|
587
|
+
args:
|
|
588
|
+
- "run"
|
|
589
|
+
- "--schema=cv-parser"
|
|
590
|
+
- "--version=v2.1.0"
|
|
591
|
+
- "--experiment=cv-eval"
|
|
592
|
+
- "--experiment-version=v1.0.0"
|
|
593
|
+
env:
|
|
594
|
+
- name: GIT__ENABLED
|
|
595
|
+
value: "true"
|
|
596
|
+
- name: GIT__DEFAULT_REPO_URL
|
|
597
|
+
value: "ssh://git@github.com/my-org/schemas.git"
|
|
598
|
+
volumeMounts:
|
|
599
|
+
- name: git-secret
|
|
600
|
+
mountPath: /etc/git-secret
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
---
|
|
604
|
+
|
|
605
|
+
## API Reference
|
|
606
|
+
|
|
607
|
+
### GitService
|
|
608
|
+
|
|
609
|
+
High-level semantic operations.
|
|
610
|
+
|
|
611
|
+
```python
|
|
612
|
+
from rem.services.git_service import GitService
|
|
613
|
+
|
|
614
|
+
git_svc = GitService(
|
|
615
|
+
fs=None, # FS instance (creates new if None)
|
|
616
|
+
schemas_dir="schemas", # Schemas directory in repo
|
|
617
|
+
experiments_dir="experiments" # Experiments directory
|
|
618
|
+
)
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
#### Methods
|
|
622
|
+
|
|
623
|
+
**`list_schema_versions(schema_name, pattern=None)`**
|
|
624
|
+
```python
|
|
625
|
+
versions = git_svc.list_schema_versions("cv-parser")
|
|
626
|
+
versions = git_svc.list_schema_versions("cv-parser", pattern="v2\\..*")
|
|
627
|
+
|
|
628
|
+
# Returns: list[dict]
|
|
629
|
+
# [
|
|
630
|
+
# {
|
|
631
|
+
# "tag": "v2.1.1",
|
|
632
|
+
# "version": (2, 1, 1),
|
|
633
|
+
# "commit": "abc123...",
|
|
634
|
+
# "date": "2025-01-15T10:30:00",
|
|
635
|
+
# "message": "feat: Add confidence",
|
|
636
|
+
# "author": "alice@example.com"
|
|
637
|
+
# },
|
|
638
|
+
# ...
|
|
639
|
+
# ]
|
|
640
|
+
```
|
|
641
|
+
|
|
642
|
+
**`load_schema(schema_name, version=None)`**
|
|
643
|
+
```python
|
|
644
|
+
schema = git_svc.load_schema("cv-parser") # Latest
|
|
645
|
+
schema = git_svc.load_schema("cv-parser", version="v2.1.0") # Specific
|
|
646
|
+
|
|
647
|
+
# Returns: dict (parsed YAML)
|
|
648
|
+
```
|
|
649
|
+
|
|
650
|
+
**`compare_schemas(schema_name, version1, version2, unified=3)`**
|
|
651
|
+
```python
|
|
652
|
+
diff = git_svc.compare_schemas("cv-parser", "v2.0.0", "v2.1.0")
|
|
653
|
+
|
|
654
|
+
# Returns: str (unified diff format)
|
|
655
|
+
```
|
|
656
|
+
|
|
657
|
+
**`has_breaking_changes(schema_name, version1, version2)`**
|
|
658
|
+
```python
|
|
659
|
+
has_breaking = git_svc.has_breaking_changes("cv-parser", "v2.0.0", "v3.0.0")
|
|
660
|
+
|
|
661
|
+
# Returns: bool
|
|
662
|
+
```
|
|
663
|
+
|
|
664
|
+
**`load_experiment(experiment_name, version=None)`**
|
|
665
|
+
```python
|
|
666
|
+
exp = git_svc.load_experiment("hello-world", version="v1.0.0")
|
|
667
|
+
|
|
668
|
+
# Returns: dict (parsed YAML)
|
|
669
|
+
```
|
|
670
|
+
|
|
671
|
+
**`sync()`**
|
|
672
|
+
```python
|
|
673
|
+
git_svc.sync() # Clear cache, force fresh clone
|
|
674
|
+
|
|
675
|
+
# Returns: None
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
**`get_commit(path, version)`**
|
|
679
|
+
```python
|
|
680
|
+
commit = git_svc.get_commit("schemas/cv-parser.yaml", "v2.1.0")
|
|
681
|
+
|
|
682
|
+
# Returns: str (40-character commit hash)
|
|
683
|
+
```
|
|
684
|
+
|
|
685
|
+
### GitProvider
|
|
686
|
+
|
|
687
|
+
Low-level Git operations (usually accessed via FS).
|
|
688
|
+
|
|
689
|
+
```python
|
|
690
|
+
from rem.services.fs.git_provider import GitProvider
|
|
691
|
+
|
|
692
|
+
provider = GitProvider(
|
|
693
|
+
repo_url="ssh://git@github.com/org/repo.git",
|
|
694
|
+
branch="main",
|
|
695
|
+
cache_dir="/tmp/rem-git-cache"
|
|
696
|
+
)
|
|
697
|
+
```
|
|
698
|
+
|
|
699
|
+
#### Methods
|
|
700
|
+
|
|
701
|
+
**`exists(uri)`**
|
|
702
|
+
```python
|
|
703
|
+
exists = provider.exists("git://schemas/cv-parser.yaml?ref=v2.1.0")
|
|
704
|
+
# Returns: bool
|
|
705
|
+
```
|
|
706
|
+
|
|
707
|
+
**`read(uri, **options)`**
|
|
708
|
+
```python
|
|
709
|
+
content = provider.read("git://schemas/cv-parser.yaml?ref=v2.1.0")
|
|
710
|
+
# Returns: Any (format-specific)
|
|
711
|
+
```
|
|
712
|
+
|
|
713
|
+
**`ls(uri, **options)`**
|
|
714
|
+
```python
|
|
715
|
+
files = provider.ls("git://schemas/?ref=v2.1.0")
|
|
716
|
+
# Returns: list[str]
|
|
717
|
+
```
|
|
718
|
+
|
|
719
|
+
**`get_semantic_versions(file_path, pattern=None)`**
|
|
720
|
+
```python
|
|
721
|
+
versions = provider.get_semantic_versions("schemas/cv-parser.yaml")
|
|
722
|
+
versions = provider.get_semantic_versions("schemas/cv-parser.yaml", pattern="v2\\..*")
|
|
723
|
+
# Returns: list[dict]
|
|
724
|
+
```
|
|
725
|
+
|
|
726
|
+
**`diff_versions(file_path, version1, version2, unified=3)`**
|
|
727
|
+
```python
|
|
728
|
+
diff = provider.diff_versions("schemas/cv-parser.yaml", "v2.0.0", "v2.1.0")
|
|
729
|
+
# Returns: str
|
|
730
|
+
```
|
|
731
|
+
|
|
732
|
+
**`clear_cache(ref=None)`**
|
|
733
|
+
```python
|
|
734
|
+
provider.clear_cache("v2.1.0") # Clear specific version
|
|
735
|
+
provider.clear_cache() # Clear all versions
|
|
736
|
+
# Returns: None
|
|
737
|
+
```
|
|
738
|
+
|
|
739
|
+
**`get_current_commit(ref=None)`**
|
|
740
|
+
```python
|
|
741
|
+
commit = provider.get_current_commit("v2.1.0")
|
|
742
|
+
# Returns: str (40-character hash)
|
|
743
|
+
```
|
|
744
|
+
|
|
745
|
+
---
|
|
746
|
+
|
|
747
|
+
## Security Best Practices
|
|
748
|
+
|
|
749
|
+
### 1. Use Read-Only Deploy Keys
|
|
750
|
+
|
|
751
|
+
```bash
|
|
752
|
+
# GitHub: Settings → Deploy keys
|
|
753
|
+
# ✓ Read access
|
|
754
|
+
# ✗ Write access
|
|
755
|
+
|
|
756
|
+
# GitLab: Settings → Repository → Deploy keys
|
|
757
|
+
# ✓ Read repository
|
|
758
|
+
# ✗ Write repository
|
|
759
|
+
```
|
|
760
|
+
|
|
761
|
+
### 2. Store Secrets in Kubernetes Secrets
|
|
762
|
+
|
|
763
|
+
```bash
|
|
764
|
+
# ✅ GOOD: Kubernetes Secret
|
|
765
|
+
kubectl create secret generic git-creds \
|
|
766
|
+
--from-file=ssh=$HOME/.ssh/deploy_key
|
|
767
|
+
|
|
768
|
+
# ❌ BAD: Environment variable
|
|
769
|
+
export GIT__SSH_KEY="-----BEGIN OPENSSH PRIVATE KEY-----\n..."
|
|
770
|
+
```
|
|
771
|
+
|
|
772
|
+
### 3. Enable Known Hosts Verification
|
|
773
|
+
|
|
774
|
+
```bash
|
|
775
|
+
# Generate known_hosts
|
|
776
|
+
ssh-keyscan github.com >> ~/.ssh/known_hosts
|
|
777
|
+
|
|
778
|
+
# Configure REM
|
|
779
|
+
export GIT__KNOWN_HOSTS_PATH="$HOME/.ssh/known_hosts"
|
|
780
|
+
|
|
781
|
+
# This prevents MITM attacks
|
|
782
|
+
```
|
|
783
|
+
|
|
784
|
+
### 4. Rotate PATs Regularly
|
|
785
|
+
|
|
786
|
+
```bash
|
|
787
|
+
# GitHub: Set expiration to 90 days
|
|
788
|
+
# GitLab: Set expiration to 90 days
|
|
789
|
+
|
|
790
|
+
# Rotate before expiration
|
|
791
|
+
# Update Kubernetes Secret
|
|
792
|
+
kubectl create secret generic git-creds \
|
|
793
|
+
--from-literal=token=ghp_NEW_TOKEN \
|
|
794
|
+
--dry-run=client -o yaml | kubectl apply -f -
|
|
795
|
+
```
|
|
796
|
+
|
|
797
|
+
### 5. Use Least Privilege
|
|
798
|
+
|
|
799
|
+
```bash
|
|
800
|
+
# GitHub Fine-grained PAT:
|
|
801
|
+
# Permissions → Contents → Read-only ✓
|
|
802
|
+
# Permissions → Contents → Read and write ✗
|
|
803
|
+
|
|
804
|
+
# SSH Deploy Key:
|
|
805
|
+
# Read access ✓
|
|
806
|
+
# Write access ✗
|
|
807
|
+
```
|
|
808
|
+
|
|
809
|
+
### 6. Audit Access
|
|
810
|
+
|
|
811
|
+
```bash
|
|
812
|
+
# Monitor Git access logs
|
|
813
|
+
kubectl logs -l app=rem-api | grep "Git"
|
|
814
|
+
|
|
815
|
+
# GitHub/GitLab audit logs
|
|
816
|
+
# Settings → Security → Audit log
|
|
817
|
+
```
|
|
818
|
+
|
|
819
|
+
---
|
|
820
|
+
|
|
821
|
+
## Performance & Caching
|
|
822
|
+
|
|
823
|
+
### Cache Hit Rates
|
|
824
|
+
|
|
825
|
+
```
|
|
826
|
+
Typical Performance:
|
|
827
|
+
- First clone: 1-10 seconds (depends on repo size)
|
|
828
|
+
- Cached read: <10ms (local filesystem)
|
|
829
|
+
- Shallow clone: 90% size reduction
|
|
830
|
+
|
|
831
|
+
Cache Efficiency:
|
|
832
|
+
- Same version, multiple reads: 100% cache hit
|
|
833
|
+
- Different versions: Separate cache entries
|
|
834
|
+
- Branch updates: Manual sync required
|
|
835
|
+
```
|
|
836
|
+
|
|
837
|
+
### Shallow Clones
|
|
838
|
+
|
|
839
|
+
```python
|
|
840
|
+
# Enable shallow clones (default)
|
|
841
|
+
export GIT__SHALLOW_CLONE=true
|
|
842
|
+
|
|
843
|
+
# Benefits:
|
|
844
|
+
# - Faster clone (only latest commit)
|
|
845
|
+
# - Less disk space (no history)
|
|
846
|
+
# - Recommended for production
|
|
847
|
+
|
|
848
|
+
# Disable for full history
|
|
849
|
+
export GIT__SHALLOW_CLONE=false
|
|
850
|
+
```
|
|
851
|
+
|
|
852
|
+
### Cache Management
|
|
853
|
+
|
|
854
|
+
```python
|
|
855
|
+
from rem.services.git_service import GitService
|
|
856
|
+
|
|
857
|
+
git_svc = GitService()
|
|
858
|
+
|
|
859
|
+
# Manual sync (clear cache, pull latest)
|
|
860
|
+
git_svc.sync()
|
|
861
|
+
|
|
862
|
+
# Periodic sync (configure interval)
|
|
863
|
+
export GIT__SYNC_INTERVAL=300 # 5 minutes
|
|
864
|
+
```
|
|
865
|
+
|
|
866
|
+
### Monitoring
|
|
867
|
+
|
|
868
|
+
```python
|
|
869
|
+
import os
|
|
870
|
+
from pathlib import Path
|
|
871
|
+
|
|
872
|
+
cache_dir = Path(os.environ.get("GIT__CACHE_DIR", "/tmp/rem-git-cache"))
|
|
873
|
+
|
|
874
|
+
# Check cache size
|
|
875
|
+
def get_cache_size():
|
|
876
|
+
total = sum(
|
|
877
|
+
f.stat().st_size
|
|
878
|
+
for f in cache_dir.rglob("*")
|
|
879
|
+
if f.is_file()
|
|
880
|
+
)
|
|
881
|
+
return total / (1024 ** 2) # MB
|
|
882
|
+
|
|
883
|
+
print(f"Git cache size: {get_cache_size():.2f} MB")
|
|
884
|
+
|
|
885
|
+
# List cached repos
|
|
886
|
+
for repo_dir in cache_dir.iterdir():
|
|
887
|
+
if repo_dir.is_dir():
|
|
888
|
+
print(f"Repo: {repo_dir.name}")
|
|
889
|
+
for ref_dir in repo_dir.iterdir():
|
|
890
|
+
if ref_dir.is_dir():
|
|
891
|
+
print(f" - {ref_dir.name}")
|
|
892
|
+
```
|
|
893
|
+
|
|
894
|
+
---
|
|
895
|
+
|
|
896
|
+
## Troubleshooting
|
|
897
|
+
|
|
898
|
+
### SSH Key Not Found
|
|
899
|
+
|
|
900
|
+
**Error**:
|
|
901
|
+
```
|
|
902
|
+
FileNotFoundError: SSH key not found at /etc/git-secret/ssh
|
|
903
|
+
```
|
|
904
|
+
|
|
905
|
+
**Solution**:
|
|
906
|
+
```bash
|
|
907
|
+
# Check if secret is mounted
|
|
908
|
+
kubectl describe pod rem-api-xxx | grep git-secret
|
|
909
|
+
|
|
910
|
+
# Verify secret exists
|
|
911
|
+
kubectl get secret git-creds -n rem-app
|
|
912
|
+
|
|
913
|
+
# Check file permissions
|
|
914
|
+
kubectl exec rem-api-xxx -- ls -la /etc/git-secret/
|
|
915
|
+
|
|
916
|
+
# Expected:
|
|
917
|
+
# -r-------- 1 rem rem 464 Jan 15 10:30 ssh
|
|
918
|
+
# -r-------- 1 rem rem 444 Jan 15 10:30 known_hosts
|
|
919
|
+
```
|
|
920
|
+
|
|
921
|
+
### Authentication Failed
|
|
922
|
+
|
|
923
|
+
**Error**:
|
|
924
|
+
```
|
|
925
|
+
GitCommandError: Permission denied (publickey)
|
|
926
|
+
```
|
|
927
|
+
|
|
928
|
+
**Solution**:
|
|
929
|
+
```bash
|
|
930
|
+
# Test SSH key locally
|
|
931
|
+
ssh -i /path/to/key git@github.com
|
|
932
|
+
|
|
933
|
+
# Check deploy key in GitHub
|
|
934
|
+
# Settings → Deploy keys → Verify key is added
|
|
935
|
+
|
|
936
|
+
# Verify known_hosts contains host
|
|
937
|
+
grep github.com ~/.ssh/known_hosts
|
|
938
|
+
|
|
939
|
+
# Regenerate known_hosts if needed
|
|
940
|
+
ssh-keyscan github.com > ~/.ssh/known_hosts
|
|
941
|
+
```
|
|
942
|
+
|
|
943
|
+
### Rate Limit Exceeded (HTTPS)
|
|
944
|
+
|
|
945
|
+
**Error**:
|
|
946
|
+
```
|
|
947
|
+
API rate limit exceeded for user
|
|
948
|
+
```
|
|
949
|
+
|
|
950
|
+
**Solution**:
|
|
951
|
+
```bash
|
|
952
|
+
# Switch to SSH authentication
|
|
953
|
+
export GIT__DEFAULT_REPO_URL="ssh://git@github.com/org/repo.git"
|
|
954
|
+
|
|
955
|
+
# Or use a GitHub App token (higher limits)
|
|
956
|
+
export GIT__PERSONAL_ACCESS_TOKEN="ghp_..."
|
|
957
|
+
```
|
|
958
|
+
|
|
959
|
+
### Repo Clone Timeout
|
|
960
|
+
|
|
961
|
+
**Error**:
|
|
962
|
+
```
|
|
963
|
+
GitCommandError: timeout after 60s
|
|
964
|
+
```
|
|
965
|
+
|
|
966
|
+
**Solution**:
|
|
967
|
+
```bash
|
|
968
|
+
# Enable shallow clone
|
|
969
|
+
export GIT__SHALLOW_CLONE=true
|
|
970
|
+
|
|
971
|
+
# Or increase Git timeout
|
|
972
|
+
git config --global http.postBuffer 524288000
|
|
973
|
+
|
|
974
|
+
# Check network connectivity
|
|
975
|
+
kubectl exec rem-api-xxx -- ping github.com
|
|
976
|
+
```
|
|
977
|
+
|
|
978
|
+
### Cache Corruption
|
|
979
|
+
|
|
980
|
+
**Error**:
|
|
981
|
+
```
|
|
982
|
+
InvalidGitRepositoryError: /tmp/rem-git-cache/xxx is not a git repository
|
|
983
|
+
```
|
|
984
|
+
|
|
985
|
+
**Solution**:
|
|
986
|
+
```python
|
|
987
|
+
from rem.services.git_service import GitService
|
|
988
|
+
|
|
989
|
+
git_svc = GitService()
|
|
990
|
+
|
|
991
|
+
# Clear corrupted cache
|
|
992
|
+
git_svc.sync()
|
|
993
|
+
|
|
994
|
+
# Or manually delete cache
|
|
995
|
+
rm -rf /tmp/rem-git-cache/*
|
|
996
|
+
```
|
|
997
|
+
|
|
998
|
+
### File Not Found at Version
|
|
999
|
+
|
|
1000
|
+
**Error**:
|
|
1001
|
+
```
|
|
1002
|
+
FileNotFoundError: Path 'schemas/agent.yaml' not found at ref 'v2.1.0'
|
|
1003
|
+
```
|
|
1004
|
+
|
|
1005
|
+
**Solution**:
|
|
1006
|
+
```python
|
|
1007
|
+
from rem.services.git_service import GitService
|
|
1008
|
+
|
|
1009
|
+
git_svc = GitService()
|
|
1010
|
+
|
|
1011
|
+
# List available versions
|
|
1012
|
+
versions = git_svc.list_schema_versions("agent")
|
|
1013
|
+
|
|
1014
|
+
# Check if file exists at tag
|
|
1015
|
+
# File may have been renamed or moved
|
|
1016
|
+
```
|
|
1017
|
+
|
|
1018
|
+
### Known Hosts Verification Failed
|
|
1019
|
+
|
|
1020
|
+
**Error**:
|
|
1021
|
+
```
|
|
1022
|
+
Host key verification failed
|
|
1023
|
+
```
|
|
1024
|
+
|
|
1025
|
+
**Solution**:
|
|
1026
|
+
```bash
|
|
1027
|
+
# Add host to known_hosts
|
|
1028
|
+
ssh-keyscan github.com >> ~/.ssh/known_hosts
|
|
1029
|
+
|
|
1030
|
+
# Update Kubernetes Secret
|
|
1031
|
+
kubectl create secret generic git-creds \
|
|
1032
|
+
--from-file=ssh=$HOME/.ssh/deploy_key \
|
|
1033
|
+
--from-file=known_hosts=$HOME/.ssh/known_hosts \
|
|
1034
|
+
--dry-run=client -o yaml | kubectl apply -f -
|
|
1035
|
+
|
|
1036
|
+
# Restart pods
|
|
1037
|
+
kubectl rollout restart deployment/rem-api -n rem-app
|
|
1038
|
+
```
|
|
1039
|
+
|
|
1040
|
+
---
|
|
1041
|
+
|
|
1042
|
+
## Environment Variables Reference
|
|
1043
|
+
|
|
1044
|
+
| Variable | Default | Description |
|
|
1045
|
+
|----------|---------|-------------|
|
|
1046
|
+
| `GIT__ENABLED` | `false` | Enable Git provider |
|
|
1047
|
+
| `GIT__DEFAULT_REPO_URL` | `None` | Git repository URL (ssh:// or https://) |
|
|
1048
|
+
| `GIT__DEFAULT_BRANCH` | `main` | Default branch to clone |
|
|
1049
|
+
| `GIT__SSH_KEY_PATH` | `/etc/git-secret/ssh` | Path to SSH private key |
|
|
1050
|
+
| `GIT__KNOWN_HOSTS_PATH` | `/etc/git-secret/known_hosts` | Path to known_hosts file |
|
|
1051
|
+
| `GIT__PERSONAL_ACCESS_TOKEN` | `None` | PAT for HTTPS authentication |
|
|
1052
|
+
| `GIT__CACHE_DIR` | `/tmp/rem-git-cache` | Local cache directory |
|
|
1053
|
+
| `GIT__SHALLOW_CLONE` | `true` | Use shallow clone (--depth=1) |
|
|
1054
|
+
| `GIT__VERIFY_SSL` | `true` | Verify SSL certificates |
|
|
1055
|
+
| `GIT__SYNC_INTERVAL` | `300` | Sync interval in seconds |
|
|
1056
|
+
|
|
1057
|
+
---
|
|
1058
|
+
|
|
1059
|
+
## Additional Resources
|
|
1060
|
+
|
|
1061
|
+
- [GitPython Documentation](https://gitpython.readthedocs.io/)
|
|
1062
|
+
- [Semantic Versioning Spec](https://semver.org/)
|
|
1063
|
+
- [Kubernetes Secrets](https://kubernetes.io/docs/concepts/configuration/secret/)
|
|
1064
|
+
- [GitHub Deploy Keys](https://docs.github.com/en/developers/overview/managing-deploy-keys)
|
|
1065
|
+
- [GitLab Deploy Keys](https://docs.gitlab.com/ee/user/project/deploy_keys/)
|
|
1066
|
+
- [git-sync Sidecar](https://github.com/kubernetes/git-sync)
|
|
1067
|
+
|
|
1068
|
+
---
|
|
1069
|
+
|
|
1070
|
+
## Support
|
|
1071
|
+
|
|
1072
|
+
For issues or questions:
|
|
1073
|
+
1. Check [Troubleshooting](#troubleshooting) section
|
|
1074
|
+
2. Review logs: `kubectl logs -l app=rem-api | grep Git`
|
|
1075
|
+
3. Open issue: https://github.com/your-org/rem/issues
|