corpus-forge 0.1.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. corpus_forge/__init__.py +3 -0
  2. corpus_forge/__main__.py +6 -0
  3. corpus_forge/_http.py +193 -0
  4. corpus_forge/_ml_device.py +57 -0
  5. corpus_forge/alembic/__init__.py +0 -0
  6. corpus_forge/alembic/env.py +120 -0
  7. corpus_forge/alembic/script.py.mako +30 -0
  8. corpus_forge/alembic/versions/.gitkeep +0 -0
  9. corpus_forge/alembic/versions/0001_core.py +353 -0
  10. corpus_forge/alembic/versions/0002_chunk_content_hash.py +53 -0
  11. corpus_forge/alembic/versions/0003_views.py +74 -0
  12. corpus_forge/alembic/versions/0004_sync.py +105 -0
  13. corpus_forge/alembic/versions/0005_fts.py +75 -0
  14. corpus_forge/alembic/versions/0006_writes_and_feedback.py +135 -0
  15. corpus_forge/alembic/versions/0007_chat_templates.py +60 -0
  16. corpus_forge/alembic/versions/0008_feedback_sessions.py +86 -0
  17. corpus_forge/alembic/versions/0009_feedback_host_default.py +70 -0
  18. corpus_forge/alembic/versions/0010_document_label_confidence.py +57 -0
  19. corpus_forge/alembic/versions/0011_image_embeddings.py +69 -0
  20. corpus_forge/backends/base.py +285 -0
  21. corpus_forge/backends/postgres.py +2655 -0
  22. corpus_forge/backends/sqlite.py +3225 -0
  23. corpus_forge/backends/sqlite_vec_loader.py +47 -0
  24. corpus_forge/chunkers/__init__.py +39 -0
  25. corpus_forge/chunkers/base.py +216 -0
  26. corpus_forge/chunkers/cdc.py +241 -0
  27. corpus_forge/chunkers/code.py +465 -0
  28. corpus_forge/chunkers/conversation.py +9 -0
  29. corpus_forge/chunkers/markdown.py +9 -0
  30. corpus_forge/classifiers/__init__.py +137 -0
  31. corpus_forge/classifiers/base.py +202 -0
  32. corpus_forge/classifiers/llm.py +254 -0
  33. corpus_forge/classifiers/registry.py +113 -0
  34. corpus_forge/classifiers/rule_based.py +342 -0
  35. corpus_forge/cli.py +1914 -0
  36. corpus_forge/config.py +624 -0
  37. corpus_forge/curation/__init__.py +36 -0
  38. corpus_forge/curation/prompts.py +54 -0
  39. corpus_forge/curation/selector.py +774 -0
  40. corpus_forge/daemon.py +77 -0
  41. corpus_forge/doctor/__init__.py +34 -0
  42. corpus_forge/doctor/checks.py +146 -0
  43. corpus_forge/embed.py +320 -0
  44. corpus_forge/embedders/base.py +62 -0
  45. corpus_forge/embedders/clip_local.py +123 -0
  46. corpus_forge/embedders/clip_remote.py +133 -0
  47. corpus_forge/embedders/multimodal.py +83 -0
  48. corpus_forge/embedders/openai.py +133 -0
  49. corpus_forge/embedders/registry.py +58 -0
  50. corpus_forge/embedders/sentence_transformers.py +124 -0
  51. corpus_forge/enrichers/__init__.py +157 -0
  52. corpus_forge/enrichers/base.py +319 -0
  53. corpus_forge/enrichers/qwen_local.py +180 -0
  54. corpus_forge/enrichers/qwen_remote.py +195 -0
  55. corpus_forge/enrichers/registry.py +61 -0
  56. corpus_forge/estimate.py +601 -0
  57. corpus_forge/eval/__init__.py +39 -0
  58. corpus_forge/eval/dataset.py +167 -0
  59. corpus_forge/eval/datasets/forge_self.corpus.md +84 -0
  60. corpus_forge/eval/datasets/forge_self.jsonl +28 -0
  61. corpus_forge/eval/metrics.py +171 -0
  62. corpus_forge/eval/runner.py +309 -0
  63. corpus_forge/export.py +251 -0
  64. corpus_forge/exports/huggingface.py +66 -0
  65. corpus_forge/extractors/__init__.py +16 -0
  66. corpus_forge/extractors/audio.py +86 -0
  67. corpus_forge/extractors/base.py +89 -0
  68. corpus_forge/extractors/code.py +243 -0
  69. corpus_forge/extractors/csv.py +74 -0
  70. corpus_forge/extractors/epub.py +73 -0
  71. corpus_forge/extractors/html.py +47 -0
  72. corpus_forge/extractors/image.py +98 -0
  73. corpus_forge/extractors/notebook.py +82 -0
  74. corpus_forge/extractors/office.py +82 -0
  75. corpus_forge/extractors/passthrough.py +43 -0
  76. corpus_forge/extractors/pdf.py +427 -0
  77. corpus_forge/extractors/plaintext.py +54 -0
  78. corpus_forge/extractors/registry.py +307 -0
  79. corpus_forge/extractors/structured.py +122 -0
  80. corpus_forge/extractors/subtitle.py +76 -0
  81. corpus_forge/extractors/video.py +157 -0
  82. corpus_forge/identity.py +30 -0
  83. corpus_forge/ingest.py +634 -0
  84. corpus_forge/mcp/__init__.py +25 -0
  85. corpus_forge/mcp/server.py +1557 -0
  86. corpus_forge/mcp/templates.py +270 -0
  87. corpus_forge/mcp/transport.py +33 -0
  88. corpus_forge/mcp/writes.py +706 -0
  89. corpus_forge/py.typed +0 -0
  90. corpus_forge/retrieval/__init__.py +29 -0
  91. corpus_forge/retrieval/fusion.py +89 -0
  92. corpus_forge/retrieval/normalize.py +72 -0
  93. corpus_forge/retrieval/rerank/__init__.py +35 -0
  94. corpus_forge/retrieval/rerank/base.py +64 -0
  95. corpus_forge/retrieval/rerank/cross_encoder.py +201 -0
  96. corpus_forge/retrieval/rerank/ollama.py +192 -0
  97. corpus_forge/retrieval/retriever.py +231 -0
  98. corpus_forge/retrieval/types.py +85 -0
  99. corpus_forge/schema/migrate.py +134 -0
  100. corpus_forge/schema/per_embedder.sql.tmpl +9 -0
  101. corpus_forge/setup/__init__.py +34 -0
  102. corpus_forge/setup/questions.toml +331 -0
  103. corpus_forge/setup/wizard.py +438 -0
  104. corpus_forge/sources/_flatten.py +66 -0
  105. corpus_forge/sources/_session_link.py +26 -0
  106. corpus_forge/sources/base.py +106 -0
  107. corpus_forge/sources/chatgpt_export.py +184 -0
  108. corpus_forge/sources/claude_code.py +119 -0
  109. corpus_forge/sources/codex_cli.py +90 -0
  110. corpus_forge/sources/filesystem.py +243 -0
  111. corpus_forge/sources/gemini_cli.py +98 -0
  112. corpus_forge/sources/jsonl_chat.py +94 -0
  113. corpus_forge/sources/markdown_vault.py +74 -0
  114. corpus_forge/sources/opencode.py +92 -0
  115. corpus_forge/sync/__init__.py +5 -0
  116. corpus_forge/sync/cloud.py +67 -0
  117. corpus_forge/sync/conflicts.py +75 -0
  118. corpus_forge/sync/echo.py +78 -0
  119. corpus_forge/sync/engine.py +59 -0
  120. corpus_forge/sync/fs.py +99 -0
  121. corpus_forge/sync/pull.py +155 -0
  122. corpus_forge/sync/push.py +248 -0
  123. corpus_forge/templates/__init__.py +86 -0
  124. corpus_forge/templates/builtins/__init__.py +1 -0
  125. corpus_forge/templates/builtins/alpaca.py +23 -0
  126. corpus_forge/templates/builtins/chatml.py +19 -0
  127. corpus_forge/templates/builtins/gemma.py +21 -0
  128. corpus_forge/templates/builtins/llama3.py +22 -0
  129. corpus_forge/templates/builtins/qwen.py +19 -0
  130. corpus_forge/templates/builtins/vicuna.py +23 -0
  131. corpus_forge/templates/hf.py +39 -0
  132. corpus_forge/templates/tools.py +25 -0
  133. corpus_forge/update/__init__.py +30 -0
  134. corpus_forge/update/channels.py +204 -0
  135. corpus_forge/update/version_check.py +202 -0
  136. corpus_forge/vlm/__init__.py +45 -0
  137. corpus_forge/vlm/base.py +146 -0
  138. corpus_forge/vlm/mistral.py +127 -0
  139. corpus_forge/vlm/ollama.py +138 -0
  140. corpus_forge/vlm/registry.py +143 -0
  141. corpus_forge/whisper/__init__.py +45 -0
  142. corpus_forge/whisper/base.py +138 -0
  143. corpus_forge/whisper/local.py +186 -0
  144. corpus_forge/whisper/registry.py +145 -0
  145. corpus_forge/whisper/remote.py +109 -0
  146. corpus_forge-0.1.0b2.dist-info/METADATA +758 -0
  147. corpus_forge-0.1.0b2.dist-info/RECORD +150 -0
  148. corpus_forge-0.1.0b2.dist-info/WHEEL +4 -0
  149. corpus_forge-0.1.0b2.dist-info/entry_points.txt +2 -0
  150. corpus_forge-0.1.0b2.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,3 @@
1
+ """Corpus Forge — chat with your data, build a living trainable corpus."""
2
+
3
+ __version__ = "0.1.0b2"
@@ -0,0 +1,6 @@
1
+ """Entry point for python -m corpus_forge"""
2
+
3
+ from .cli import app
4
+
5
+ if __name__ == "__main__":
6
+ app()
corpus_forge/_http.py ADDED
@@ -0,0 +1,193 @@
1
+ """Shared HTTP transport for remote model-backend clients.
2
+
3
+ Every remote model integration in corpus-forge (VLM, Whisper, code
4
+ enricher, LLM classifier, multi-modal embedder) speaks to a JSON HTTP
5
+ endpoint and maps the same set of failure modes onto a family-specific
6
+ error triad — ``<Family>UnavailableError`` / ``TimeoutError`` /
7
+ ``ResponseError``.
8
+
9
+ This module owns the mapping in one place. Each family declares the
10
+ triad with an :class:`HttpErrors` bundle and calls :func:`request_json`,
11
+ which:
12
+
13
+ - catches the standard ``requests`` exception ladder (``Timeout`` /
14
+ ``ConnectionError`` / ``RequestException``) and raises the matching
15
+ family-typed error;
16
+ - optionally promotes ``401``/``403`` to the family's "unavailable"
17
+ bucket (API-key rejection is a configuration failure, not a flake);
18
+ - treats non-2xx HTTP, malformed JSON, non-object JSON, and missing
19
+ required keys as response errors with a truncated body snippet in the
20
+ message.
21
+
22
+ Tests mock ``requests.post`` / ``requests.get`` directly. This module
23
+ calls them by name (not via ``requests.request``) so existing
24
+ ``patch("requests.post", ...)`` contracts continue to work.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ from collections.abc import Mapping, Sequence
30
+ from dataclasses import dataclass
31
+ from typing import Any, Literal
32
+
33
+ __all__ = ["HttpErrors", "bearer_headers", "request_json"]
34
+
35
+ # Snippet length for response bodies inside error messages. Long enough
36
+ # to be informative, short enough to keep audit logs scannable.
37
+ _BODY_SNIPPET = 200
38
+
39
+ Method = Literal["GET", "POST"]
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class HttpErrors:
44
+ """The three discriminable HTTP-transport error classes for a family.
45
+
46
+ Declared once at module scope per family (e.g.
47
+ ``_ERR = HttpErrors(VLMUnavailableError, VLMTimeoutError,
48
+ VLMResponseError)``) and threaded through :func:`request_json` so
49
+ the shared transport raises the right family-typed exception.
50
+ """
51
+
52
+ unavailable: type[BaseException]
53
+ timeout: type[BaseException]
54
+ response: type[BaseException]
55
+
56
+
57
+ def bearer_headers(
58
+ api_key: str | None, *, extra: Mapping[str, str] | None = None
59
+ ) -> dict[str, str]:
60
+ """Build a header dict with optional ``Authorization: Bearer <key>``.
61
+
62
+ Returns an empty dict (plus any ``extra`` overrides) when ``api_key``
63
+ is falsy — matches the "open hosted Ollama" case where the header is
64
+ omitted entirely.
65
+ """
66
+ headers: dict[str, str] = {}
67
+ if api_key:
68
+ headers["Authorization"] = f"Bearer {api_key}"
69
+ if extra:
70
+ headers.update(extra)
71
+ return headers
72
+
73
+
74
+ def _snippet(text: str | None) -> str:
75
+ return (text or "")[:_BODY_SNIPPET]
76
+
77
+
78
+ def request_json(
79
+ method: Method,
80
+ url: str,
81
+ *,
82
+ timeout_s: float,
83
+ errors: HttpErrors,
84
+ label: str,
85
+ base_url: str | None = None,
86
+ json_body: Mapping[str, Any] | None = None,
87
+ files: Mapping[str, Any] | None = None,
88
+ data: Mapping[str, Any] | None = None,
89
+ headers: Mapping[str, str] | None = None,
90
+ api_key: str | None = None,
91
+ required_keys: Sequence[str] = (),
92
+ auth_to_unavailable: bool = True,
93
+ health_check: bool = False,
94
+ ) -> dict[str, Any]:
95
+ """Issue an HTTP request and return the parsed JSON object.
96
+
97
+ Args:
98
+ method: ``"GET"`` or ``"POST"``. POST is dispatched to
99
+ ``requests.post``; GET to ``requests.get`` — by name so test
100
+ ``patch("requests.post", ...)`` contracts survive.
101
+ url: Fully composed request URL.
102
+ timeout_s: Per-request HTTP budget.
103
+ errors: Family-specific :class:`HttpErrors` triad.
104
+ label: Human-readable name used in error messages
105
+ (e.g. ``"Ollama generate"``, ``"Mistral OCR"``).
106
+ base_url: Base URL shown in "Cannot connect to <label> at
107
+ <base_url>" — defaults to ``url`` when omitted.
108
+ json_body: Mapping to serialise as the JSON request body.
109
+ files: Multipart upload files (POST only).
110
+ data: Multipart form-data fields (POST only).
111
+ headers: Extra request headers. ``Authorization`` is set
112
+ automatically when ``api_key`` is provided.
113
+ api_key: Bearer token. ``None`` / empty omits the header.
114
+ required_keys: Top-level keys that MUST appear in the parsed
115
+ JSON; missing keys raise ``errors.response`` (or
116
+ ``errors.unavailable`` when ``health_check=True``).
117
+ auth_to_unavailable: When True (default), 401/403 responses are
118
+ raised as ``errors.unavailable`` ("API key rejected"). Set
119
+ False for endpoints without auth (local Ollama daemons).
120
+ health_check: Probe-mode toggle. When True, every non-success
121
+ failure — Timeout, non-2xx, malformed JSON, missing required
122
+ key — is raised as ``errors.unavailable`` ("not reachable" /
123
+ "unhealthy"). Use this for warmup probes; leave False for
124
+ body calls where Timeout vs Response is a meaningful
125
+ distinction for retry/back-off callers.
126
+
127
+ Returns:
128
+ The parsed top-level JSON object (always a ``dict``).
129
+
130
+ Raises:
131
+ errors.unavailable: connect refused / DNS failure / 401 / 403 /
132
+ generic ``RequestException``, or — with ``health_check=True``
133
+ — any other non-success.
134
+ errors.timeout: ``requests.Timeout`` on a body call
135
+ (``health_check=False``).
136
+ errors.response: non-2xx HTTP, malformed JSON, non-object JSON,
137
+ or a missing ``required_keys`` entry (``health_check=False``).
138
+ """
139
+ import requests # noqa: PLC0415 — lazy: every model backend keeps `requests` optional
140
+
141
+ request_headers = bearer_headers(api_key, extra=headers)
142
+
143
+ kwargs: dict[str, Any] = {"headers": request_headers, "timeout": timeout_s}
144
+ if json_body is not None:
145
+ kwargs["json"] = dict(json_body)
146
+ if files is not None:
147
+ kwargs["files"] = dict(files)
148
+ if data is not None:
149
+ kwargs["data"] = dict(data)
150
+
151
+ base = base_url if base_url is not None else url
152
+ fn = requests.post if method == "POST" else requests.get
153
+
154
+ # In health-check mode, response/timeout failures collapse to the
155
+ # unavailable bucket. We pick the response-error class once and the
156
+ # body-validation branches reuse it.
157
+ body_error = errors.unavailable if health_check else errors.response
158
+
159
+ try:
160
+ resp = fn(url, **kwargs)
161
+ except requests.Timeout as exc:
162
+ if health_check:
163
+ raise errors.unavailable(
164
+ f"{label} at {base} did not respond within {timeout_s}s — is it reachable?"
165
+ ) from exc
166
+ raise errors.timeout(f"{label} exceeded {timeout_s}s budget at {url}") from exc
167
+ except requests.ConnectionError as exc:
168
+ raise errors.unavailable(f"Cannot connect to {label} at {base}: {exc}") from exc
169
+ except requests.RequestException as exc:
170
+ raise errors.unavailable(f"{label} request failed: {exc}") from exc
171
+
172
+ if auth_to_unavailable and resp.status_code in (401, 403):
173
+ raise errors.unavailable(
174
+ f"{label} API key rejected (HTTP {resp.status_code}): {_snippet(resp.text)}"
175
+ )
176
+ if not resp.ok:
177
+ raise body_error(f"HTTP {resp.status_code}: {_snippet(resp.text)}")
178
+
179
+ try:
180
+ payload = resp.json()
181
+ except ValueError as exc:
182
+ raise body_error(f"Malformed JSON from {label}: {_snippet(resp.text)}") from exc
183
+
184
+ if not isinstance(payload, dict):
185
+ raise body_error(f"{label} returned non-object JSON: {str(payload)[:_BODY_SNIPPET]}")
186
+
187
+ for key in required_keys:
188
+ if key not in payload:
189
+ raise body_error(
190
+ f"{label} response missing {key!r} key: {str(payload)[:_BODY_SNIPPET]}"
191
+ )
192
+
193
+ return payload
@@ -0,0 +1,57 @@
1
+ """Shared device-detection helper for sentence-transformers-style backends.
2
+
3
+ Four call sites used to roll the same MPS → CUDA → CPU heuristic by
4
+ hand: :class:`SentenceTransformersEmbedder`, :class:`ClipLocalEmbedder`,
5
+ :class:`LocalWhisper`, and :class:`CrossEncoderReranker`. They now all
6
+ call :func:`detect_device`.
7
+
8
+ The single subtlety is :class:`LocalWhisper`: ``faster-whisper`` does
9
+ not yet support the MPS backend, so it disables the MPS branch via
10
+ ``prefer_mps=False``.
11
+
12
+ ``torch`` is imported lazily so this module is safe to import in
13
+ environments where the ML stack isn't installed (it returns ``"cpu"``
14
+ unconditionally in that case).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ __all__ = ["detect_device", "resolve_device"]
20
+
21
+ _AUTO = "auto"
22
+
23
+
24
+ def detect_device(*, prefer_mps: bool = True) -> str:
25
+ """Pick the best available concrete device.
26
+
27
+ Args:
28
+ prefer_mps: When True (default), Apple Silicon's Metal backend
29
+ is preferred when available. Set False for libraries that
30
+ don't yet support MPS (faster-whisper).
31
+
32
+ Returns:
33
+ ``"mps"`` (when ``prefer_mps`` and available), ``"cuda"``, or
34
+ ``"cpu"``. Falls back to ``"cpu"`` when ``torch`` isn't
35
+ importable so callers can still run on hosts without the ML
36
+ stack installed.
37
+ """
38
+ try:
39
+ import torch # noqa: PLC0415
40
+ except ImportError:
41
+ return "cpu"
42
+ if prefer_mps and torch.backends.mps.is_available():
43
+ return "mps"
44
+ if torch.cuda.is_available():
45
+ return "cuda"
46
+ return "cpu"
47
+
48
+
49
+ def resolve_device(device: str, *, prefer_mps: bool = True) -> str:
50
+ """Translate the ``"auto"`` sentinel into a concrete device.
51
+
52
+ Any other value is returned unchanged so callers can pass through
53
+ user-specified ``"cpu"`` / ``"cuda"`` / ``"mps"`` strings.
54
+ """
55
+ if device == _AUTO:
56
+ return detect_device(prefer_mps=prefer_mps)
57
+ return device
File without changes
@@ -0,0 +1,120 @@
1
+ """Alembic migration environment.
2
+
3
+ Dialect-aware configuration:
4
+ - SQLite → render_as_batch=True (no DDL transactional ALTER support)
5
+ - Postgres → version_table_schema="corpus"
6
+
7
+ All operator-facing messages go through the ``alembic.runtime.migration``
8
+ logger (stderr). No print() calls.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ import os
15
+
16
+ from alembic import context
17
+ from sqlalchemy import engine_from_config, pool
18
+
19
+ log = logging.getLogger("alembic.runtime.migration")
20
+
21
+
22
+ def _get_url() -> str:
23
+ """Resolve the database URL from env var or alembic.ini config.
24
+
25
+ Priority:
26
+ 1. ``DATABASE_URL`` environment variable (honoured by the legacy migrator)
27
+ 2. ``CORPUS_FORGE_DATABASE_URL`` environment variable
28
+ 3. ``sqlalchemy.url`` from the Alembic Config object
29
+ """
30
+ url = os.environ.get("DATABASE_URL") or os.environ.get("CORPUS_FORGE_DATABASE_URL")
31
+ if url:
32
+ return url
33
+ cfg_url: str = context.config.get_main_option("sqlalchemy.url", default="")
34
+ return cfg_url
35
+
36
+
37
+ def run_migrations_offline() -> None:
38
+ """Run migrations in 'offline' mode.
39
+
40
+ This configures the context with just a URL and not an Engine; calls to
41
+ context.execute() emit the given string to the script output.
42
+ """
43
+ url = _get_url()
44
+
45
+ # Determine whether this looks like a Postgres URL to set schema.
46
+ is_postgres = url.startswith(("postgresql", "postgres"))
47
+
48
+ configure_kwargs: dict = {
49
+ "url": url,
50
+ "target_metadata": None,
51
+ "literal_binds": True,
52
+ "dialect_opts": {"paramstyle": "named"},
53
+ "version_table": "alembic_version",
54
+ }
55
+ if is_postgres:
56
+ configure_kwargs["version_table_schema"] = "corpus"
57
+
58
+ with context.begin_transaction():
59
+ context.configure(**configure_kwargs)
60
+ context.run_migrations()
61
+
62
+
63
+ def run_migrations_online() -> None:
64
+ """Run migrations in 'online' mode (with an Engine/Connection)."""
65
+ creator = context.config.attributes.get("creator")
66
+ if creator is not None:
67
+ # In-memory SQLite: use the backend's shared-cache factory so Alembic
68
+ # operates on the same in-memory database as the SQLiteBackend instance.
69
+ connectable = engine_from_config(
70
+ {},
71
+ prefix="sqlalchemy.",
72
+ poolclass=pool.NullPool,
73
+ creator=creator,
74
+ # SQLite dialect is inferred from the creator connection.
75
+ url="sqlite+pysqlite://",
76
+ )
77
+ else:
78
+ connectable = engine_from_config(
79
+ context.config.get_section(context.config.config_ini_section, {}),
80
+ prefix="sqlalchemy.",
81
+ poolclass=pool.NullPool,
82
+ url=_get_url() or None,
83
+ )
84
+
85
+ with connectable.connect() as connection:
86
+ dialect_name: str = connection.dialect.name
87
+
88
+ if dialect_name == "sqlite":
89
+ context.configure(
90
+ connection=connection,
91
+ render_as_batch=True,
92
+ version_table="alembic_version",
93
+ target_metadata=None,
94
+ )
95
+ else:
96
+ # Postgres (and any other dialect) — use corpus schema for version table.
97
+ context.configure(
98
+ connection=connection,
99
+ version_table="alembic_version",
100
+ version_table_schema="corpus",
101
+ target_metadata=None,
102
+ )
103
+
104
+ with context.begin_transaction():
105
+ context.run_migrations()
106
+
107
+
108
+ # Module body: only execute when invoked via Alembic CLI / programmatic runner.
109
+ # Guarded so that a plain ``import corpus_forge.alembic.env`` (e.g. in tests)
110
+ # does not attempt to run migrations without a configured Alembic context.
111
+ try:
112
+ _offline = context.is_offline_mode()
113
+ except Exception:
114
+ # Not running under Alembic's migration framework — plain import, skip.
115
+ pass
116
+ else:
117
+ if _offline:
118
+ run_migrations_offline()
119
+ else:
120
+ run_migrations_online()
@@ -0,0 +1,30 @@
1
+ """${message} # noqa: D
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Union
11
+
12
+ from alembic import op
13
+ import sqlalchemy as sa
14
+ ${imports if imports else ""}
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = ${repr(up_revision)}
18
+ down_revision: Union[str, None] = ${repr(down_revision)}
19
+ branch_labels: Union[str, tuple[str, ...], None] = ${repr(branch_labels)}
20
+ depends_on: Union[str, tuple[str, ...], None] = ${repr(depends_on)}
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Apply forward migrations."""
25
+ ${upgrades if upgrades else "pass"}
26
+
27
+
28
+ def downgrade() -> None:
29
+ """Apply reverse migrations."""
30
+ ${downgrades if downgrades else "pass"}
File without changes