remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,305 @@
1
+ """
2
+ Example usage of embeddings utility for generating vector embeddings.
3
+
4
+ This demonstrates batch processing, error handling with tenacity automatic retries,
5
+ and integration patterns for the PostgresService.
6
+ """
7
+
8
+ from rem.utils.embeddings import (
9
+ EmbeddingError,
10
+ RateLimitError,
11
+ generate_embeddings,
12
+ get_embedding_dimension,
13
+ )
14
+
15
+
16
+ def example_single_embedding():
17
+ """Generate embedding for a single text."""
18
+ print("=" * 80)
19
+ print("SINGLE EMBEDDING EXAMPLE")
20
+ print("=" * 80)
21
+
22
+ text = "What is the meaning of life?"
23
+ embedding_provider = "openai:text-embedding-3-small"
24
+
25
+ try:
26
+ # Generate embedding
27
+ embedding = generate_embeddings(embedding_provider, text)
28
+
29
+ # Check dimensions
30
+ dimension = get_embedding_dimension(embedding_provider)
31
+
32
+ print(f"\nText: {text}")
33
+ print(f"Provider: {embedding_provider}")
34
+ print(f"Embedding dimension: {dimension}")
35
+ print(f"Actual length: {len(embedding)}")
36
+ print(f"First 5 values: {embedding[:5]}")
37
+
38
+ except EmbeddingError as e:
39
+ print(f"Error: {e}")
40
+
41
+
42
+ def example_batch_embeddings():
43
+ """Generate embeddings for multiple texts in a single API call."""
44
+ print("\n" + "=" * 80)
45
+ print("BATCH EMBEDDING EXAMPLE")
46
+ print("=" * 80)
47
+
48
+ texts = [
49
+ "What is the meaning of life?",
50
+ "How do I bake a chocolate cake?",
51
+ "Explain quantum physics in simple terms",
52
+ "Write a haiku about programming",
53
+ "What is the capital of France?",
54
+ ]
55
+
56
+ embedding_provider = "openai:text-embedding-3-small"
57
+
58
+ try:
59
+ # Generate embeddings in batch (more efficient than individual calls)
60
+ embeddings = generate_embeddings(embedding_provider, texts)
61
+
62
+ print(f"\nGenerated {len(embeddings)} embeddings")
63
+ print(f"Provider: {embedding_provider}\n")
64
+
65
+ for i, (text, embedding) in enumerate(zip(texts, embeddings)):
66
+ print(f"{i+1}. {text[:50]}...")
67
+ print(f" Dimension: {len(embedding)}")
68
+ print(f" First 3 values: {embedding[:3]}")
69
+
70
+ except RateLimitError as e:
71
+ print(f"Rate limit exceeded: {e}")
72
+ print("Tenacity automatic retry failed. Consider reducing batch size or waiting.")
73
+ except EmbeddingError as e:
74
+ print(f"Error: {e}")
75
+
76
+
77
+ def example_multiple_providers():
78
+ """Compare embeddings from different providers."""
79
+ print("\n" + "=" * 80)
80
+ print("MULTIPLE PROVIDERS EXAMPLE")
81
+ print("=" * 80)
82
+
83
+ text = "Machine learning is transforming software development"
84
+
85
+ providers = [
86
+ "openai:text-embedding-3-small",
87
+ "openai:text-embedding-3-large",
88
+ "openai:text-embedding-ada-002",
89
+ ]
90
+
91
+ print(f"\nText: {text}\n")
92
+
93
+ for provider in providers:
94
+ try:
95
+ embedding = generate_embeddings(provider, text)
96
+ dimension = get_embedding_dimension(provider)
97
+
98
+ print(f"Provider: {provider}")
99
+ print(f" Dimension: {dimension}")
100
+ print(f" First 3 values: {embedding[:3]}\n")
101
+
102
+ except EmbeddingError as e:
103
+ print(f"Provider: {provider}")
104
+ print(f" Error: {e}\n")
105
+
106
+
107
+ def example_error_handling():
108
+ """Demonstrate error handling and retries."""
109
+ print("\n" + "=" * 80)
110
+ print("ERROR HANDLING EXAMPLE")
111
+ print("=" * 80)
112
+
113
+ # Invalid provider format
114
+ try:
115
+ generate_embeddings("invalid_format", "test")
116
+ except ValueError as e:
117
+ print(f"\nInvalid format error (expected): {e}")
118
+
119
+ # Empty text
120
+ try:
121
+ generate_embeddings("openai:text-embedding-3-small", [])
122
+ except ValueError as e:
123
+ print(f"\nEmpty input error (expected): {e}")
124
+
125
+ # Unknown model
126
+ try:
127
+ get_embedding_dimension("openai:unknown-model")
128
+ except ValueError as e:
129
+ print(f"\nUnknown model error (expected): {e}")
130
+
131
+
132
+ def example_postgres_integration():
133
+ """
134
+ Example pattern for PostgresService integration.
135
+
136
+ This shows how to use embeddings utility in a PostgresService method.
137
+ """
138
+ print("\n" + "=" * 80)
139
+ print("POSTGRES INTEGRATION PATTERN")
140
+ print("=" * 80)
141
+
142
+ print(
143
+ """
144
+ # In PostgresService class:
145
+
146
+ async def generate_and_store_embedding(
147
+ self,
148
+ table_name: str,
149
+ record_id: str,
150
+ text_content: str,
151
+ embedding_provider: str = "openai:text-embedding-3-small"
152
+ ) -> None:
153
+ '''
154
+ Generate embedding for text content and store in database.
155
+
156
+ Args:
157
+ table_name: Table containing the record
158
+ record_id: ID of the record to update
159
+ text_content: Text to embed
160
+ embedding_provider: Provider and model for embeddings
161
+ '''
162
+ from rem.utils.embeddings import generate_embeddings, get_embedding_dimension
163
+
164
+ # Generate embedding
165
+ embedding = generate_embeddings(embedding_provider, text_content)
166
+
167
+ # Get dimension for vector column
168
+ dimension = get_embedding_dimension(embedding_provider)
169
+
170
+ # Ensure vector column exists
171
+ await self.execute(f'''
172
+ ALTER TABLE {table_name}
173
+ ADD COLUMN IF NOT EXISTS embedding vector({dimension})
174
+ ''')
175
+
176
+ # Store embedding
177
+ await self.execute(
178
+ f'''
179
+ UPDATE {table_name}
180
+ SET embedding = $1::vector
181
+ WHERE id = $2
182
+ ''',
183
+ embedding,
184
+ record_id
185
+ )
186
+
187
+
188
+ async def batch_generate_embeddings(
189
+ self,
190
+ table_name: str,
191
+ text_column: str = "content",
192
+ embedding_provider: str = "openai:text-embedding-3-small",
193
+ batch_size: int = 100
194
+ ) -> None:
195
+ '''
196
+ Generate embeddings for all records in a table (batch processing).
197
+
198
+ Args:
199
+ table_name: Table to process
200
+ text_column: Column containing text to embed
201
+ embedding_provider: Provider and model for embeddings
202
+ batch_size: Number of records to process per batch
203
+ '''
204
+ from rem.utils.embeddings import generate_embeddings, get_embedding_dimension
205
+
206
+ # Get dimension
207
+ dimension = get_embedding_dimension(embedding_provider)
208
+
209
+ # Ensure vector column exists
210
+ await self.execute(f'''
211
+ ALTER TABLE {table_name}
212
+ ADD COLUMN IF NOT EXISTS embedding vector({dimension})
213
+ ''')
214
+
215
+ # Get all records without embeddings
216
+ records = await self.fetch_all(f'''
217
+ SELECT id, {text_column}
218
+ FROM {table_name}
219
+ WHERE embedding IS NULL
220
+ LIMIT {batch_size}
221
+ ''')
222
+
223
+ if not records:
224
+ return
225
+
226
+ # Extract texts and IDs
227
+ texts = [record[text_column] for record in records]
228
+ ids = [record['id'] for record in records]
229
+
230
+ # Generate embeddings in batch
231
+ embeddings = generate_embeddings(embedding_provider, texts)
232
+
233
+ # Store embeddings
234
+ for record_id, embedding in zip(ids, embeddings):
235
+ await self.execute(
236
+ f'''
237
+ UPDATE {table_name}
238
+ SET embedding = $1::vector
239
+ WHERE id = $2
240
+ ''',
241
+ embedding,
242
+ record_id
243
+ )
244
+ """
245
+ )
246
+
247
+
248
+ if __name__ == "__main__":
249
+ # Run examples
250
+ # NOTE: Requires OPENAI_API_KEY or LLM__OPENAI_API_KEY environment variable
251
+
252
+ # Check if API key is available
253
+ import os
254
+
255
+ if not (os.getenv("OPENAI_API_KEY") or os.getenv("LLM__OPENAI_API_KEY")):
256
+ print("=" * 80)
257
+ print("SETUP REQUIRED")
258
+ print("=" * 80)
259
+ print("\nTo run these examples, set your OpenAI API key:")
260
+ print(" export OPENAI_API_KEY='sk-...'")
261
+ print(" # OR")
262
+ print(" export LLM__OPENAI_API_KEY='sk-...'")
263
+ print("\nThen run:")
264
+ print(" python embeddings_example.py")
265
+ exit(1)
266
+
267
+ # Run examples (comment out if you don't want to make API calls)
268
+ example_single_embedding()
269
+ example_batch_embeddings()
270
+ example_multiple_providers()
271
+ example_error_handling()
272
+ example_postgres_integration()
273
+
274
+ print("\n" + "=" * 80)
275
+ print("BEST PRACTICES")
276
+ print("=" * 80)
277
+ print(
278
+ """
279
+ 1. Batch Processing:
280
+ - Process multiple texts in a single API call (up to 2048 for OpenAI)
281
+ - Reduces API overhead and stays within rate limits (RPM)
282
+ - Example: generate_embeddings(provider, [text1, text2, ...])
283
+
284
+ 2. Rate Limit Handling:
285
+ - Uses tenacity library for automatic exponential backoff (default: 1 retry)
286
+ - Adjust max_retries parameter if needed (default: 1)
287
+ - Monitor your usage and adjust batch_size accordingly
288
+ - Consider implementing a queue for large-scale processing
289
+
290
+ 3. Error Handling:
291
+ - Catch EmbeddingError for general API errors
292
+ - Catch RateLimitError for rate limit specific handling
293
+ - Validate embedding_provider format before batch processing
294
+
295
+ 4. Cost Optimization:
296
+ - OpenAI text-embedding-3-small: $0.02 / 1M tokens
297
+ - OpenAI text-embedding-3-large: $0.13 / 1M tokens
298
+ - Use smaller models unless you need higher accuracy
299
+
300
+ 5. PostgreSQL Integration:
301
+ - Use vector({dimension}) column type with pgvector extension
302
+ - Create indexes: CREATE INDEX ON table USING ivfflat (embedding vector_cosine_ops)
303
+ - For similarity search: ORDER BY embedding <=> query_vector LIMIT 10
304
+ """
305
+ )
@@ -0,0 +1,202 @@
1
+ """
2
+ Example usage of sql_types utility for generating PostgreSQL schema from Pydantic models.
3
+
4
+ This demonstrates how REM entity models are mapped to PostgreSQL types.
5
+ """
6
+
7
+ from datetime import datetime
8
+ from uuid import UUID
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+ from rem.utils.sql_types import (
13
+ get_column_definition,
14
+ get_sql_type,
15
+ model_to_create_table,
16
+ model_to_upsert,
17
+ )
18
+
19
+
20
+ # Example 1: CoreModel with various field types
21
+ class CoreModel(BaseModel):
22
+ """Base model demonstrating all common field types."""
23
+
24
+ # ID - Union type, should prefer UUID
25
+ id: UUID | str = Field(..., description="Unique identifier")
26
+
27
+ # Timestamps
28
+ created_at: datetime = Field(default_factory=datetime.utcnow)
29
+ updated_at: datetime = Field(default_factory=datetime.utcnow)
30
+
31
+ # Optional tenant/user fields
32
+ tenant_id: str | None = Field(default=None, description="Tenant identifier")
33
+ user_id: str | None = Field(default=None, description="User identifier")
34
+
35
+ # JSONB fields
36
+ graph_edges: list[dict] = Field(default_factory=list, description="Graph edges")
37
+ metadata: dict = Field(default_factory=dict, description="Flexible metadata")
38
+
39
+ # Array fields
40
+ tags: list[str] = Field(default_factory=list, description="Tags")
41
+
42
+ # Database schema metadata
43
+ column: dict = Field(default_factory=dict, description="Column metadata")
44
+
45
+
46
+ # Example 2: Resource with content fields
47
+ class Resource(BaseModel):
48
+ """Resource entity with long-form text fields."""
49
+
50
+ id: str
51
+ name: str # VARCHAR(256)
52
+ uri: str | None = None # VARCHAR(256), nullable
53
+ content: str = "" # TEXT (long-form field name)
54
+ description: str | None = None # TEXT (long-form field name)
55
+ category: str | None = None # VARCHAR(256)
56
+ related_entities: list[dict] = Field(default_factory=list) # JSONB
57
+
58
+
59
+ # Example 3: Schema with embedding provider
60
+ class Schema(BaseModel):
61
+ """Schema with embedding field."""
62
+
63
+ id: str
64
+ name: str
65
+ content: str = Field(
66
+ default="",
67
+ json_schema_extra={
68
+ "embedding_provider": "openai:text-embedding-3-small" # Forces TEXT
69
+ },
70
+ )
71
+ spec: dict = Field(..., description="JSON schema specification") # JSONB
72
+ category: str | None = None
73
+
74
+
75
+ # Example 4: Custom SQL type override
76
+ class CustomModel(BaseModel):
77
+ """Model with custom SQL type specification."""
78
+
79
+ id: str
80
+ vector_data: list[float] = Field(
81
+ default_factory=list,
82
+ json_schema_extra={"sql_type": "vector(1536)"}, # Custom pgvector type
83
+ )
84
+ json_data: dict = Field(default_factory=dict)
85
+
86
+
87
+ def demonstrate_field_mapping():
88
+ """Show how individual fields map to SQL types."""
89
+ print("=" * 80)
90
+ print("FIELD TYPE MAPPING EXAMPLES")
91
+ print("=" * 80)
92
+
93
+ examples = [
94
+ (CoreModel.model_fields["id"], "id", "Union[UUID, str] -> UUID (prefers UUID in unions)"),
95
+ (CoreModel.model_fields["created_at"], "created_at", "datetime -> TIMESTAMP"),
96
+ (CoreModel.model_fields["tenant_id"], "tenant_id", "str | None -> VARCHAR(256)"),
97
+ (CoreModel.model_fields["graph_edges"], "graph_edges", "list[dict] -> JSONB"),
98
+ (CoreModel.model_fields["metadata"], "metadata", "dict -> JSONB"),
99
+ (CoreModel.model_fields["tags"], "tags", "list[str] -> TEXT[]"),
100
+ (Resource.model_fields["content"], "content", "str (field name 'content') -> TEXT"),
101
+ (Resource.model_fields["name"], "name", "str -> VARCHAR(256)"),
102
+ (
103
+ Schema.model_fields["content"],
104
+ "content",
105
+ "str with embedding_provider (openai:text-embedding-3-small) -> TEXT",
106
+ ),
107
+ (
108
+ CustomModel.model_fields["vector_data"],
109
+ "vector_data",
110
+ "list[float] with sql_type -> vector(1536)",
111
+ ),
112
+ ]
113
+
114
+ for field_info, field_name, description in examples:
115
+ sql_type = get_sql_type(field_info, field_name)
116
+ print(f"\n{description}")
117
+ print(f" SQL Type: {sql_type}")
118
+
119
+
120
+ def demonstrate_column_definitions():
121
+ """Show complete column definitions."""
122
+ print("\n" + "=" * 80)
123
+ print("COLUMN DEFINITION EXAMPLES")
124
+ print("=" * 80)
125
+
126
+ examples = [
127
+ (CoreModel.model_fields["id"], "id", False, True, "Primary key"),
128
+ (CoreModel.model_fields["created_at"], "created_at", False, False, "Required timestamp"),
129
+ (CoreModel.model_fields["tenant_id"], "tenant_id", True, False, "Optional tenant"),
130
+ (CoreModel.model_fields["metadata"], "metadata", False, False, "JSONB with default"),
131
+ (CoreModel.model_fields["tags"], "tags", False, False, "Array with default"),
132
+ ]
133
+
134
+ for field_info, field_name, nullable, is_pk, description in examples:
135
+ col_def = get_column_definition(field_info, field_name, nullable, is_pk)
136
+ print(f"\n{description}:")
137
+ print(f" {col_def}")
138
+
139
+
140
+ def demonstrate_create_table():
141
+ """Generate CREATE TABLE statements."""
142
+ print("\n" + "=" * 80)
143
+ print("CREATE TABLE EXAMPLES")
144
+ print("=" * 80)
145
+
146
+ # Generate for Resource model
147
+ print("\n-- Resource Table")
148
+ print(model_to_create_table(Resource, "resources"))
149
+
150
+ # Generate for Schema model
151
+ print("\n\n-- Schema Table")
152
+ print(model_to_create_table(Schema, "schemas"))
153
+
154
+
155
+ def demonstrate_upsert():
156
+ """Generate UPSERT statements."""
157
+ print("\n" + "=" * 80)
158
+ print("UPSERT EXAMPLES")
159
+ print("=" * 80)
160
+
161
+ print("\n-- Resource Upsert")
162
+ print(model_to_upsert(Resource, "resources"))
163
+
164
+ print("\n-- Schema Upsert")
165
+ print(model_to_upsert(Schema, "schemas"))
166
+
167
+
168
+ if __name__ == "__main__":
169
+ demonstrate_field_mapping()
170
+ demonstrate_column_definitions()
171
+ demonstrate_create_table()
172
+ demonstrate_upsert()
173
+
174
+ print("\n" + "=" * 80)
175
+ print("USAGE IN CODE")
176
+ print("=" * 80)
177
+ print(
178
+ """
179
+ # Generate schema for all REM entities
180
+ from rem.models.entities import Resource, Message, User, File, Moment, Schema
181
+ from rem.utils.sql_types import model_to_create_table
182
+
183
+ for model, table_name in [
184
+ (Resource, "resources"),
185
+ (Message, "messages"),
186
+ (User, "users"),
187
+ (File, "files"),
188
+ (Moment, "moments"),
189
+ (Schema, "schemas"),
190
+ ]:
191
+ sql = model_to_create_table(model, table_name)
192
+ print(sql)
193
+ print()
194
+
195
+ # Generate upsert for inserting/updating entities
196
+ from rem.utils.sql_types import model_to_upsert
197
+
198
+ upsert_sql = model_to_upsert(Resource, "resources")
199
+ # Use with psycopg:
200
+ # cursor.execute(upsert_sql, (id, name, uri, content, ...))
201
+ """
202
+ )
rem/utils/markdown.py ADDED
@@ -0,0 +1,16 @@
1
+ """Markdown conversion utilities for document processing."""
2
+
3
+
4
+ def to_markdown(content: str, filename: str) -> str:
5
+ """
6
+ Convert extracted content to structured markdown.
7
+
8
+ Args:
9
+ content: Extracted text content
10
+ filename: Source filename
11
+
12
+ Returns:
13
+ Structured markdown string with header
14
+ """
15
+ lines = [f"# {filename}\n", content]
16
+ return "\n".join(lines)