remdb 0.3.141__py3-none-any.whl → 0.3.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (44) hide show
  1. rem/agentic/agents/__init__.py +16 -0
  2. rem/agentic/agents/agent_manager.py +310 -0
  3. rem/agentic/context.py +81 -3
  4. rem/agentic/context_builder.py +18 -3
  5. rem/api/deps.py +3 -5
  6. rem/api/main.py +22 -3
  7. rem/api/mcp_router/server.py +2 -0
  8. rem/api/mcp_router/tools.py +90 -0
  9. rem/api/middleware/tracking.py +5 -5
  10. rem/api/routers/auth.py +346 -5
  11. rem/api/routers/chat/completions.py +4 -2
  12. rem/api/routers/chat/streaming.py +77 -22
  13. rem/api/routers/messages.py +24 -15
  14. rem/auth/__init__.py +13 -3
  15. rem/auth/jwt.py +352 -0
  16. rem/auth/middleware.py +108 -6
  17. rem/auth/providers/__init__.py +4 -1
  18. rem/auth/providers/email.py +215 -0
  19. rem/cli/commands/experiments.py +32 -46
  20. rem/models/core/experiment.py +4 -14
  21. rem/models/entities/__init__.py +4 -0
  22. rem/models/entities/subscriber.py +175 -0
  23. rem/models/entities/user.py +1 -0
  24. rem/schemas/agents/core/agent-builder.yaml +134 -0
  25. rem/services/__init__.py +3 -1
  26. rem/services/content/service.py +4 -3
  27. rem/services/email/__init__.py +10 -0
  28. rem/services/email/service.py +511 -0
  29. rem/services/email/templates.py +360 -0
  30. rem/services/postgres/README.md +38 -0
  31. rem/services/postgres/diff_service.py +19 -3
  32. rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
  33. rem/services/postgres/repository.py +5 -4
  34. rem/services/session/compression.py +113 -50
  35. rem/services/session/reload.py +14 -7
  36. rem/services/user_service.py +29 -0
  37. rem/settings.py +199 -4
  38. rem/sql/migrations/005_schema_update.sql +145 -0
  39. rem/utils/README.md +45 -0
  40. rem/utils/files.py +157 -1
  41. {remdb-0.3.141.dist-info → remdb-0.3.163.dist-info}/METADATA +7 -5
  42. {remdb-0.3.141.dist-info → remdb-0.3.163.dist-info}/RECORD +44 -35
  43. {remdb-0.3.141.dist-info → remdb-0.3.163.dist-info}/WHEEL +0 -0
  44. {remdb-0.3.141.dist-info → remdb-0.3.163.dist-info}/entry_points.txt +0 -0
rem/auth/middleware.py CHANGED
@@ -1,13 +1,16 @@
1
1
  """
2
- OAuth Authentication Middleware for FastAPI.
2
+ Authentication Middleware for FastAPI.
3
3
 
4
- Protects API endpoints by requiring valid session.
5
- Supports anonymous access with rate limiting when allow_anonymous=True.
4
+ Protects API endpoints by requiring valid authentication.
5
+ Supports multiple auth methods: JWT, API Key, Session, Dev Token.
6
+ Anonymous access with rate limiting when allow_anonymous=True.
6
7
  MCP endpoints are always protected unless explicitly disabled.
7
8
 
8
9
  Design Pattern:
10
+ - Check X-API-Key header first (if API key auth enabled)
11
+ - Check JWT token in Authorization header (Bearer token)
12
+ - Check dev token (non-production only, starts with "dev_")
9
13
  - Check session for user on protected paths
10
- - Check Bearer token for dev token (non-production only)
11
14
  - MCP paths always require authentication (protected service)
12
15
  - If allow_anonymous=True: Allow unauthenticated requests (marked as ANONYMOUS tier)
13
16
  - If allow_anonymous=False: Return 401 for API calls, redirect browsers to login
@@ -20,6 +23,12 @@ Access Modes (configured in settings.auth):
20
23
  - mcp_requires_auth=true (default): MCP always requires login regardless of allow_anonymous
21
24
  - mcp_requires_auth=false: MCP follows normal allow_anonymous rules (dev only)
22
25
 
26
+ API Key Authentication (configured in settings.api):
27
+ - api_key_enabled=true: Require X-API-Key header for protected endpoints
28
+ - api_key: The secret key to validate against
29
+ - Provides simple programmatic access without OAuth flow
30
+ - X-API-Key header takes precedence over session auth
31
+
23
32
  Dev Token Support (non-production only):
24
33
  - GET /api/auth/dev/token returns a Bearer token for test-user
25
34
  - Include as: Authorization: Bearer dev_<signature>
@@ -82,6 +91,67 @@ class AuthMiddleware(BaseHTTPMiddleware):
82
91
  self.mcp_requires_auth = mcp_requires_auth
83
92
  self.mcp_path = mcp_path
84
93
 
94
+ def _check_api_key(self, request: Request) -> dict | None:
95
+ """
96
+ Check for valid X-API-Key header.
97
+
98
+ Returns:
99
+ API key user dict if valid, None otherwise
100
+ """
101
+ # Only check if API key auth is enabled
102
+ if not settings.api.api_key_enabled:
103
+ return None
104
+
105
+ # Check for X-API-Key header
106
+ api_key = request.headers.get("x-api-key")
107
+ if not api_key:
108
+ return None
109
+
110
+ # Validate against configured API key
111
+ if settings.api.api_key and api_key == settings.api.api_key:
112
+ logger.debug("X-API-Key authenticated")
113
+ return {
114
+ "id": "api-key-user",
115
+ "email": "api@rem.local",
116
+ "name": "API Key User",
117
+ "provider": "api-key",
118
+ "tenant_id": "default",
119
+ "tier": "pro", # API key users get full access
120
+ "roles": ["user"],
121
+ }
122
+
123
+ # Invalid API key
124
+ logger.warning("Invalid X-API-Key provided")
125
+ return None
126
+
127
+ def _check_jwt_token(self, request: Request) -> dict | None:
128
+ """
129
+ Check for valid JWT in Authorization header.
130
+
131
+ Returns:
132
+ User dict if valid JWT, None otherwise
133
+ """
134
+ auth_header = request.headers.get("authorization", "")
135
+ if not auth_header.startswith("Bearer "):
136
+ return None
137
+
138
+ token = auth_header[7:] # Strip "Bearer "
139
+
140
+ # Skip dev tokens (handled separately)
141
+ if token.startswith("dev_"):
142
+ return None
143
+
144
+ # Verify JWT token
145
+ from .jwt import get_jwt_service
146
+ jwt_service = get_jwt_service()
147
+ user = jwt_service.verify_token(token)
148
+
149
+ if user:
150
+ logger.debug(f"JWT authenticated: {user.get('email')}")
151
+ return user
152
+
153
+ return None
154
+
85
155
  def _check_dev_token(self, request: Request) -> dict | None:
86
156
  """
87
157
  Check for valid dev token in Authorization header (non-production only).
@@ -105,7 +175,7 @@ class AuthMiddleware(BaseHTTPMiddleware):
105
175
  # Verify dev token
106
176
  from ..api.routers.dev import verify_dev_token
107
177
  if verify_dev_token(token):
108
- logger.debug(f"Dev token authenticated as test-user")
178
+ logger.debug("Dev token authenticated as test-user")
109
179
  return {
110
180
  "id": "test-user",
111
181
  "email": "test@rem.local",
@@ -142,6 +212,38 @@ class AuthMiddleware(BaseHTTPMiddleware):
142
212
  if not is_protected or is_excluded:
143
213
  return await call_next(request)
144
214
 
215
+ # Check for X-API-Key header first (if enabled)
216
+ api_key_user = self._check_api_key(request)
217
+ if api_key_user:
218
+ request.state.user = api_key_user
219
+ request.state.is_anonymous = False
220
+ return await call_next(request)
221
+
222
+ # If API key auth is enabled but no valid key provided, reject immediately
223
+ if settings.api.api_key_enabled:
224
+ # Check if X-API-Key header was provided but invalid
225
+ if request.headers.get("x-api-key"):
226
+ logger.warning(f"Invalid X-API-Key for: {path}")
227
+ return JSONResponse(
228
+ status_code=401,
229
+ content={"detail": "Invalid API key"},
230
+ headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
231
+ )
232
+ # No API key provided when required
233
+ logger.debug(f"Missing X-API-Key for: {path}")
234
+ return JSONResponse(
235
+ status_code=401,
236
+ content={"detail": "API key required. Include X-API-Key header."},
237
+ headers={"WWW-Authenticate": 'ApiKey realm="REM API"'},
238
+ )
239
+
240
+ # Check for JWT token in Authorization header
241
+ jwt_user = self._check_jwt_token(request)
242
+ if jwt_user:
243
+ request.state.user = jwt_user
244
+ request.state.is_anonymous = False
245
+ return await call_next(request)
246
+
145
247
  # Check for dev token (non-production only)
146
248
  dev_user = self._check_dev_token(request)
147
249
  if dev_user:
@@ -149,7 +251,7 @@ class AuthMiddleware(BaseHTTPMiddleware):
149
251
  request.state.is_anonymous = False
150
252
  return await call_next(request)
151
253
 
152
- # Check for valid session
254
+ # Check for valid session (backward compatibility)
153
255
  user = request.session.get("user")
154
256
 
155
257
  if user:
@@ -1,6 +1,7 @@
1
- """OAuth provider implementations."""
1
+ """Authentication provider implementations."""
2
2
 
3
3
  from .base import OAuthProvider, OAuthTokens, OAuthUserInfo
4
+ from .email import EmailAuthProvider, EmailAuthResult
4
5
  from .google import GoogleOAuthProvider
5
6
  from .microsoft import MicrosoftOAuthProvider
6
7
 
@@ -8,6 +9,8 @@ __all__ = [
8
9
  "OAuthProvider",
9
10
  "OAuthTokens",
10
11
  "OAuthUserInfo",
12
+ "EmailAuthProvider",
13
+ "EmailAuthResult",
11
14
  "GoogleOAuthProvider",
12
15
  "MicrosoftOAuthProvider",
13
16
  ]
@@ -0,0 +1,215 @@
1
+ """
2
+ Email Authentication Provider.
3
+
4
+ Passwordless authentication using email verification codes.
5
+ Unlike OAuth providers, this handles the full flow internally.
6
+
7
+ Flow:
8
+ 1. User requests login with email address
9
+ 2. System generates code, upserts user, sends email
10
+ 3. User enters code
11
+ 4. System verifies code and creates session
12
+
13
+ Design:
14
+ - Uses EmailService for sending codes
15
+ - Creates users with deterministic UUID from email hash
16
+ - Stores challenge in user metadata
17
+ - No external OAuth dependencies
18
+ """
19
+
20
+ from typing import TYPE_CHECKING
21
+ from pydantic import BaseModel, Field
22
+ from loguru import logger
23
+
24
+ from ...services.email import EmailService
25
+
26
+ if TYPE_CHECKING:
27
+ from ...services.postgres import PostgresService
28
+
29
+
30
+ class EmailAuthResult(BaseModel):
31
+ """Result of email authentication operations."""
32
+
33
+ success: bool = Field(description="Whether operation succeeded")
34
+ email: str = Field(description="Email address")
35
+ user_id: str | None = Field(default=None, description="User ID if authenticated")
36
+ error: str | None = Field(default=None, description="Error message if failed")
37
+ message: str | None = Field(default=None, description="User-friendly message")
38
+
39
+
40
+ class EmailAuthProvider:
41
+ """
42
+ Email-based passwordless authentication provider.
43
+
44
+ Handles the complete email login flow:
45
+ 1. send_code() - Generate and send verification code
46
+ 2. verify_code() - Verify code and return user info
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ email_service: EmailService | None = None,
52
+ template_kwargs: dict | None = None,
53
+ ):
54
+ """
55
+ Initialize EmailAuthProvider.
56
+
57
+ Args:
58
+ email_service: EmailService instance (creates new one if not provided)
59
+ template_kwargs: Customization for email templates (colors, branding, etc.)
60
+ """
61
+ self._email_service = email_service or EmailService()
62
+ self._template_kwargs = template_kwargs or {}
63
+
64
+ @property
65
+ def is_configured(self) -> bool:
66
+ """Check if email auth is properly configured."""
67
+ return self._email_service.is_configured
68
+
69
+ async def send_code(
70
+ self,
71
+ email: str,
72
+ db: "PostgresService",
73
+ tenant_id: str = "default",
74
+ ) -> EmailAuthResult:
75
+ """
76
+ Send a verification code to an email address.
77
+
78
+ Creates user if not exists (using deterministic UUID from email).
79
+ Stores code in user metadata.
80
+
81
+ Args:
82
+ email: Email address to send code to
83
+ db: PostgresService instance
84
+ tenant_id: Tenant identifier
85
+
86
+ Returns:
87
+ EmailAuthResult with success status
88
+ """
89
+ if not self.is_configured:
90
+ return EmailAuthResult(
91
+ success=False,
92
+ email=email,
93
+ error="Email service not configured",
94
+ message="Email login is not available. Please try another method.",
95
+ )
96
+
97
+ try:
98
+ result = await self._email_service.send_login_code(
99
+ email=email,
100
+ db=db,
101
+ tenant_id=tenant_id,
102
+ template_kwargs=self._template_kwargs,
103
+ )
104
+
105
+ if result["success"]:
106
+ return EmailAuthResult(
107
+ success=True,
108
+ email=email,
109
+ user_id=result["user_id"],
110
+ message=f"Verification code sent to {email}. Check your inbox.",
111
+ )
112
+ else:
113
+ return EmailAuthResult(
114
+ success=False,
115
+ email=email,
116
+ error=result.get("error", "Failed to send code"),
117
+ message="Failed to send verification code. Please try again.",
118
+ )
119
+
120
+ except Exception as e:
121
+ logger.error(f"Error sending login code: {e}")
122
+ return EmailAuthResult(
123
+ success=False,
124
+ email=email,
125
+ error=str(e),
126
+ message="An error occurred. Please try again.",
127
+ )
128
+
129
+ async def verify_code(
130
+ self,
131
+ email: str,
132
+ code: str,
133
+ db: "PostgresService",
134
+ tenant_id: str = "default",
135
+ ) -> EmailAuthResult:
136
+ """
137
+ Verify a login code and authenticate user.
138
+
139
+ Args:
140
+ email: Email address
141
+ code: 6-digit verification code
142
+ db: PostgresService instance
143
+ tenant_id: Tenant identifier
144
+
145
+ Returns:
146
+ EmailAuthResult with user_id if successful
147
+ """
148
+ try:
149
+ result = await self._email_service.verify_login_code(
150
+ email=email,
151
+ code=code,
152
+ db=db,
153
+ tenant_id=tenant_id,
154
+ )
155
+
156
+ if result["valid"]:
157
+ return EmailAuthResult(
158
+ success=True,
159
+ email=email,
160
+ user_id=result["user_id"],
161
+ message="Successfully authenticated!",
162
+ )
163
+ else:
164
+ error = result.get("error", "Invalid code")
165
+ # User-friendly error messages
166
+ if error == "Login code expired":
167
+ message = "Your code has expired. Please request a new one."
168
+ elif error == "Invalid login code":
169
+ message = "Invalid code. Please check and try again."
170
+ elif error == "No login code requested":
171
+ message = "No code was requested for this email. Please request a new code."
172
+ elif error == "User not found":
173
+ message = "Email not found. Please request a login code first."
174
+ else:
175
+ message = "Verification failed. Please try again."
176
+
177
+ return EmailAuthResult(
178
+ success=False,
179
+ email=email,
180
+ error=error,
181
+ message=message,
182
+ )
183
+
184
+ except Exception as e:
185
+ logger.error(f"Error verifying login code: {e}")
186
+ return EmailAuthResult(
187
+ success=False,
188
+ email=email,
189
+ error=str(e),
190
+ message="An error occurred. Please try again.",
191
+ )
192
+
193
+ def get_user_dict(self, email: str, user_id: str) -> dict:
194
+ """
195
+ Create a user dict for session storage.
196
+
197
+ Compatible with OAuth user format for consistent session handling.
198
+
199
+ Args:
200
+ email: User's email
201
+ user_id: User's UUID
202
+
203
+ Returns:
204
+ User dict for session
205
+ """
206
+ return {
207
+ "id": user_id,
208
+ "email": email,
209
+ "email_verified": True, # Email is verified through code
210
+ "name": email.split("@")[0], # Use email prefix as name
211
+ "provider": "email",
212
+ "tenant_id": "default",
213
+ "tier": "free", # Email users start at free tier
214
+ "roles": ["user"],
215
+ }
@@ -125,19 +125,17 @@ def create(
125
125
  # Resolve base path: CLI arg > EXPERIMENTS_HOME env var > default "experiments"
126
126
  if base_path is None:
127
127
  base_path = os.getenv("EXPERIMENTS_HOME", "experiments")
128
- # Build dataset reference
128
+ # Build dataset reference (format auto-detected from file extension)
129
129
  if dataset_location == "git":
130
130
  dataset_ref = DatasetReference(
131
131
  location=DatasetLocation.GIT,
132
132
  path="ground-truth/dataset.csv",
133
- format="csv",
134
133
  description="Ground truth Q&A dataset for evaluation"
135
134
  )
136
135
  else: # s3 or hybrid
137
136
  dataset_ref = DatasetReference(
138
137
  location=DatasetLocation(dataset_location),
139
138
  path=f"s3://rem-experiments/{name}/datasets/ground_truth.parquet",
140
- format="parquet",
141
139
  schema_path="datasets/schema.yaml" if dataset_location == "hybrid" else None,
142
140
  description="Ground truth dataset for evaluation"
143
141
  )
@@ -930,58 +928,46 @@ def run(
930
928
  raise click.Abort()
931
929
  click.echo("✓ Evaluator credentials validated")
932
930
 
933
- # Load dataset using Polars
934
- import polars as pl
931
+ # Load dataset using read_dataframe utility (auto-detects format from extension)
932
+ from rem.utils.files import read_dataframe
935
933
 
936
934
  click.echo(f"Loading dataset: {list(config.datasets.keys())[0]}")
937
935
  dataset_ref = list(config.datasets.values())[0]
938
936
 
939
- if dataset_ref.location.value == "git":
940
- # Load from Git (local filesystem)
941
- dataset_path = Path(base_path) / name / dataset_ref.path
942
- if not dataset_path.exists():
943
- click.echo(f"Error: Dataset not found: {dataset_path}")
944
- raise click.Abort()
945
-
946
- if dataset_ref.format == "csv":
947
- dataset_df = pl.read_csv(dataset_path)
948
- elif dataset_ref.format == "parquet":
949
- dataset_df = pl.read_parquet(dataset_path)
950
- elif dataset_ref.format == "jsonl":
951
- dataset_df = pl.read_ndjson(dataset_path)
952
- else:
953
- click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
954
- raise click.Abort()
955
- elif dataset_ref.location.value in ["s3", "hybrid"]:
956
- # Load from S3 using FS provider
957
- from rem.services.fs import FS
958
- from io import BytesIO
937
+ try:
938
+ if dataset_ref.location.value == "git":
939
+ # Load from Git (local filesystem)
940
+ dataset_path = Path(base_path) / name / dataset_ref.path
941
+ if not dataset_path.exists():
942
+ click.echo(f"Error: Dataset not found: {dataset_path}")
943
+ raise click.Abort()
959
944
 
960
- fs = FS()
945
+ dataset_df = read_dataframe(dataset_path)
961
946
 
962
- try:
963
- if dataset_ref.format == "csv":
964
- content = fs.read(dataset_ref.path)
965
- dataset_df = pl.read_csv(BytesIO(content.encode() if isinstance(content, str) else content))
966
- elif dataset_ref.format == "parquet":
967
- content_bytes = fs.read(dataset_ref.path)
968
- dataset_df = pl.read_parquet(BytesIO(content_bytes if isinstance(content_bytes, bytes) else content_bytes.encode()))
969
- elif dataset_ref.format == "jsonl":
970
- content = fs.read(dataset_ref.path)
971
- dataset_df = pl.read_ndjson(BytesIO(content.encode() if isinstance(content, str) else content))
972
- else:
973
- click.echo(f"Error: Format '{dataset_ref.format}' not yet supported")
974
- raise click.Abort()
947
+ elif dataset_ref.location.value in ["s3", "hybrid"]:
948
+ # Load from S3 using FS provider
949
+ from rem.services.fs import FS
975
950
 
951
+ fs = FS()
952
+ content = fs.read(dataset_ref.path)
953
+ # Ensure we have bytes
954
+ if isinstance(content, str):
955
+ content = content.encode()
956
+ dataset_df = read_dataframe(content, filename=dataset_ref.path)
976
957
  click.echo(f"✓ Loaded dataset from S3")
977
- except Exception as e:
978
- logger.error(f"Failed to load dataset from S3: {e}")
979
- click.echo(f"Error: Could not load dataset from S3")
980
- click.echo(f" Path: {dataset_ref.path}")
981
- click.echo(f" Format: {dataset_ref.format}")
958
+
959
+ else:
960
+ click.echo(f"Error: Unknown dataset location: {dataset_ref.location.value}")
982
961
  raise click.Abort()
983
- else:
984
- click.echo(f"Error: Unknown dataset location: {dataset_ref.location.value}")
962
+
963
+ except ValueError as e:
964
+ # Unsupported format error from read_dataframe
965
+ click.echo(f"Error: {e}")
966
+ raise click.Abort()
967
+ except Exception as e:
968
+ logger.error(f"Failed to load dataset: {e}")
969
+ click.echo(f"Error: Could not load dataset")
970
+ click.echo(f" Path: {dataset_ref.path}")
985
971
  raise click.Abort()
986
972
 
987
973
  click.echo(f"✓ Loaded dataset: {len(dataset_df)} examples")
@@ -138,18 +138,14 @@ class DatasetReference(BaseModel):
138
138
 
139
139
  path: str = Field(
140
140
  description=(
141
- "Path to dataset:\n"
141
+ "Path to dataset. Format is inferred from file extension.\n"
142
+ "Supported: .csv, .tsv, .parquet, .json, .jsonl, .xlsx, .ods, .avro, .ipc\n"
142
143
  "- Git: Relative path from experiment root (e.g., 'datasets/ground_truth.csv')\n"
143
- "- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/ground_truth.csv')\n"
144
+ "- S3: Full S3 URI (e.g., 's3://bucket/experiments/my-exp/datasets/data.parquet')\n"
144
145
  "- Hybrid: S3 URI for data, Git path for schema"
145
146
  )
146
147
  )
147
148
 
148
- format: Literal["csv", "jsonl", "parquet", "json"] = Field(
149
- default="csv",
150
- description="Dataset file format"
151
- )
152
-
153
149
  schema_path: str | None = Field(
154
150
  default=None,
155
151
  description=(
@@ -262,8 +258,7 @@ class ExperimentConfig(BaseModel):
262
258
  datasets:
263
259
  ground_truth:
264
260
  location: git
265
- path: datasets/ground_truth.csv
266
- format: csv
261
+ path: datasets/ground_truth.csv # format inferred from extension
267
262
  results:
268
263
  location: git
269
264
  base_path: results/
@@ -288,12 +283,10 @@ class ExperimentConfig(BaseModel):
288
283
  ground_truth:
289
284
  location: s3
290
285
  path: s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet
291
- format: parquet
292
286
  schema_path: datasets/schema.yaml # Schema in Git for documentation
293
287
  test_cases:
294
288
  location: s3
295
289
  path: s3://rem-prod/experiments/cv-parser-production/datasets/test_cases.jsonl
296
- format: jsonl
297
290
  results:
298
291
  location: hybrid
299
292
  base_path: s3://rem-prod/experiments/cv-parser-production/results/
@@ -558,7 +551,6 @@ class ExperimentConfig(BaseModel):
558
551
 
559
552
  - **Location**: `{dataset.location.value}`
560
553
  - **Path**: `{dataset.path}`
561
- - **Format**: `{dataset.format}`
562
554
  """
563
555
  if dataset.description:
564
556
  readme += f"- **Description**: {dataset.description}\n"
@@ -629,7 +621,6 @@ EXAMPLE_SMALL_EXPERIMENT = ExperimentConfig(
629
621
  "ground_truth": DatasetReference(
630
622
  location=DatasetLocation.GIT,
631
623
  path="datasets/ground_truth.csv",
632
- format="csv",
633
624
  description="10 manually curated test cases"
634
625
  )
635
626
  },
@@ -659,7 +650,6 @@ EXAMPLE_LARGE_EXPERIMENT = ExperimentConfig(
659
650
  "ground_truth": DatasetReference(
660
651
  location=DatasetLocation.S3,
661
652
  path="s3://rem-prod/experiments/cv-parser-production/datasets/ground_truth.parquet",
662
- format="parquet",
663
653
  schema_path="datasets/schema.yaml",
664
654
  description="10,000 CV/resume pairs with ground truth extractions"
665
655
  )
@@ -39,6 +39,7 @@ from .shared_session import (
39
39
  SharedWithMeResponse,
40
40
  SharedWithMeSummary,
41
41
  )
42
+ from .subscriber import Subscriber, SubscriberOrigin, SubscriberStatus
42
43
  from .user import User, UserTier
43
44
 
44
45
  __all__ = [
@@ -56,6 +57,9 @@ __all__ = [
56
57
  "FeedbackCategory",
57
58
  "User",
58
59
  "UserTier",
60
+ "Subscriber",
61
+ "SubscriberStatus",
62
+ "SubscriberOrigin",
59
63
  "File",
60
64
  "Moment",
61
65
  "Schema",