fpu-barometer-admin 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. fpu_barometer_admin/__init__.py +6 -0
  2. fpu_barometer_admin/cli/__init__.py +5 -0
  3. fpu_barometer_admin/cli/commands.py +199 -0
  4. fpu_barometer_admin/cli/deploy.py +719 -0
  5. fpu_barometer_admin/connectors/__init__.py +56 -0
  6. fpu_barometer_admin/connectors/acled_connector.py +77 -0
  7. fpu_barometer_admin/connectors/base_connector.py +60 -0
  8. fpu_barometer_admin/connectors/cpj_connector.py +92 -0
  9. fpu_barometer_admin/connectors/ert_connector.py +134 -0
  10. fpu_barometer_admin/connectors/gdelt_connector.py +403 -0
  11. fpu_barometer_admin/connectors/mfrr_connector.py +171 -0
  12. fpu_barometer_admin/connectors/rr_connector.py +84 -0
  13. fpu_barometer_admin/connectors/static_sources.py +41 -0
  14. fpu_barometer_admin/connectors/vdem_connector.py +165 -0
  15. fpu_barometer_admin/handlers/__init__.py +6 -0
  16. fpu_barometer_admin/handlers/function_app.py +543 -0
  17. fpu_barometer_admin/processors/__init__.py +46 -0
  18. fpu_barometer_admin/processors/acled_processor.py +263 -0
  19. fpu_barometer_admin/processors/base_processor.py +23 -0
  20. fpu_barometer_admin/processors/cpj_processor.py +147 -0
  21. fpu_barometer_admin/processors/ert_processor.py +72 -0
  22. fpu_barometer_admin/processors/gdelt_processor.py +260 -0
  23. fpu_barometer_admin/processors/mfrr_processor.py +327 -0
  24. fpu_barometer_admin/processors/rr_processor.py +208 -0
  25. fpu_barometer_admin/processors/vdem_processor.py +70 -0
  26. fpu_barometer_admin/runners/__init__.py +19 -0
  27. fpu_barometer_admin/runners/definitions.py +159 -0
  28. fpu_barometer_admin/runners/runners.py +291 -0
  29. fpu_barometer_admin/runners/scheduler.py +148 -0
  30. fpu_barometer_admin/runners/seed.py +399 -0
  31. fpu_barometer_admin/schemas/__init__.py +1 -0
  32. fpu_barometer_admin/schemas/event.py +362 -0
  33. fpu_barometer_admin/schemas/predictor.py +418 -0
  34. fpu_barometer_admin/storage/__init__.py +39 -0
  35. fpu_barometer_admin/storage/catalog.py +359 -0
  36. fpu_barometer_admin/storage/factory.py +165 -0
  37. fpu_barometer_admin/storage/objects.py +463 -0
  38. fpu_barometer_admin/storage/reader.py +410 -0
  39. fpu_barometer_admin-0.3.0.dist-info/METADATA +27 -0
  40. fpu_barometer_admin-0.3.0.dist-info/RECORD +43 -0
  41. fpu_barometer_admin-0.3.0.dist-info/WHEEL +4 -0
  42. fpu_barometer_admin-0.3.0.dist-info/entry_points.txt +2 -0
  43. fpu_barometer_admin-0.3.0.dist-info/licenses/LICENSE.md +7 -0
@@ -0,0 +1,6 @@
1
+ """FPU Admin backend/admin implementation package."""
2
+
3
+ from .runners import DatasetRunner, DatasetRunResult
4
+
5
+ __version__ = "0.3.0"
6
+ __all__ = ["DatasetRunner", "DatasetRunResult"]
@@ -0,0 +1,5 @@
1
+ """Command-line interface for FPU admin pipeline."""
2
+
3
+ from .commands import cli
4
+
5
+ __all__ = ["cli"]
@@ -0,0 +1,199 @@
1
+ """CLI commands for the canonical FPU admin package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from pathlib import Path
7
+
8
+ import click
9
+
10
+ from fpu_barometer_admin.cli.deploy import (
11
+ AzureCodeRedeployConfig,
12
+ AzureDeployConfig,
13
+ deploy_azure,
14
+ redeploy_azure_code,
15
+ )
16
+ from fpu_barometer_admin.runners import DatasetRunner
17
+ from fpu_barometer_admin.runners.seed import seed_gdelt_processed_artifact
18
+ from fpu_barometer_admin.storage import create_storage_runtime_from_env
19
+
20
+
21
+ @click.group()
22
+ @click.option("--data-path", default="./data", help="Path to data directory")
23
+ @click.pass_context
24
+ def cli(ctx: click.Context, data_path: str):
25
+ """FPU Admin command line interface."""
26
+ ctx.ensure_object(dict)
27
+ runtime_env = dict(os.environ)
28
+ runtime_env["FPU_DATA_PATH"] = str(Path(data_path))
29
+ ctx.obj["runtime_env"] = runtime_env
30
+
31
+
32
+ @cli.command("deploy-azure")
33
+ @click.option("--resource-group", default="fpu-research-rg", show_default=True)
34
+ @click.option("--location", default="westeurope", show_default=True)
35
+ @click.option("--environment", default="prod", show_default=True)
36
+ @click.option("--storage-container", default="datasets", show_default=True)
37
+ @click.option(
38
+ "--project-root",
39
+ type=click.Path(file_okay=False, path_type=Path),
40
+ default=None,
41
+ help="Project root containing pyproject.toml. Auto-detected when omitted.",
42
+ )
43
+ @click.option("--python-version", default="3.10", show_default=True)
44
+ def deploy_azure_command(
45
+ resource_group: str,
46
+ location: str,
47
+ environment: str,
48
+ storage_container: str,
49
+ project_root: Path | None,
50
+ python_version: str,
51
+ ):
52
+ """Deploy Barometer's Azure Functions API and Blob storage."""
53
+
54
+ result = deploy_azure(
55
+ AzureDeployConfig(
56
+ resource_group=resource_group,
57
+ location=location,
58
+ environment=environment,
59
+ storage_container=storage_container,
60
+ project_root=project_root,
61
+ python_version=python_version,
62
+ )
63
+ )
64
+ click.echo(f"function_app={result.function_app}")
65
+ click.echo(f"function_app_url={result.function_app_url}")
66
+ click.echo(f"storage_account={result.storage_account}")
67
+ click.echo(f"storage_container={result.storage_container}")
68
+ click.echo(f"config_path={result.config_path}")
69
+
70
+
71
+ @cli.command("redeploy-azure-code")
72
+ @click.option("--resource-group", default=None, help="Existing Azure resource group.")
73
+ @click.option("--function-app", default=None, help="Existing Azure Function App name.")
74
+ @click.option(
75
+ "--project-root",
76
+ type=click.Path(file_okay=False, path_type=Path),
77
+ default=None,
78
+ help="Project root containing pyproject.toml. Auto-detected when omitted.",
79
+ )
80
+ @click.option("--scheduler-cron", default="0 15 2 * * *", show_default=True)
81
+ def redeploy_azure_code_command(
82
+ resource_group: str | None,
83
+ function_app: str | None,
84
+ project_root: Path | None,
85
+ scheduler_cron: str,
86
+ ):
87
+ """Redeploy code to an existing Azure Function App without touching storage."""
88
+
89
+ result = redeploy_azure_code(
90
+ AzureCodeRedeployConfig(
91
+ resource_group=resource_group,
92
+ function_app=function_app,
93
+ project_root=project_root,
94
+ scheduler_cron=scheduler_cron,
95
+ )
96
+ )
97
+ click.echo(f"function_app={result.function_app}")
98
+ click.echo(f"function_app_url={result.function_app_url}")
99
+ click.echo(f"resource_group={result.resource_group}")
100
+ click.echo(f"scheduler_cron={result.scheduler_cron}")
101
+ click.echo(f"config_path={result.config_path}")
102
+
103
+
104
+ @cli.command("seed-processed-artifact")
105
+ @click.argument("dataset")
106
+ @click.option(
107
+ "--processed-artifact",
108
+ "processed_path",
109
+ required=True,
110
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
111
+ help="Local canonical processed GDELT Event Parquet artifact to publish to Azure.",
112
+ )
113
+ @click.option(
114
+ "--watermark-after",
115
+ required=True,
116
+ help="Explicit GDELT watermark after the seeded period, as YYYYmmddHHMMSS.",
117
+ )
118
+ @click.option(
119
+ "--azure-storage-connection-string",
120
+ envvar="FPU_AZURE_STORAGE_CONNECTION_STRING",
121
+ required=True,
122
+ help="Azure Storage connection string for the target Barometer storage account.",
123
+ )
124
+ @click.option(
125
+ "--azure-storage-container",
126
+ envvar="FPU_AZURE_STORAGE_CONTAINER",
127
+ default="datasets",
128
+ show_default=True,
129
+ help="Azure Blob container containing Barometer dataset objects and catalog records.",
130
+ )
131
+ @click.pass_context
132
+ def seed_processed_artifact(
133
+ ctx: click.Context,
134
+ dataset: str,
135
+ processed_path: Path,
136
+ watermark_after: str,
137
+ azure_storage_connection_string: str,
138
+ azure_storage_container: str,
139
+ ):
140
+ """Publish a local processed replacement artifact to Azure and update the catalog."""
141
+
142
+ if dataset != "gdelt":
143
+ raise click.BadParameter("only 'gdelt' is supported", param_hint="dataset")
144
+ if len(watermark_after) != 14 or not watermark_after.isdigit():
145
+ raise click.BadParameter(
146
+ "must be in YYYYmmddHHMMSS format", param_hint="--watermark-after"
147
+ )
148
+ if processed_path.suffix.lower() != ".parquet":
149
+ raise click.BadParameter(
150
+ "must point to a local processed .parquet file",
151
+ param_hint="--processed-artifact",
152
+ )
153
+
154
+ runtime_env = dict(ctx.obj["runtime_env"])
155
+ runtime_env["FPU_STORAGE_BACKEND"] = "azure"
156
+ runtime_env["FPU_AZURE_STORAGE_CONNECTION_STRING"] = azure_storage_connection_string
157
+ runtime_env["FPU_AZURE_STORAGE_CONTAINER"] = azure_storage_container
158
+ runtime = create_storage_runtime_from_env(runtime_env)
159
+ result = seed_gdelt_processed_artifact(
160
+ object_storage=runtime.object_storage,
161
+ catalog=runtime.create_catalog(),
162
+ processed_path=processed_path,
163
+ watermark_after=watermark_after,
164
+ reader_provider=runtime.get_reader,
165
+ )
166
+ for key, value in result.key_values():
167
+ click.echo(f"{key}={value}")
168
+ if result.status == "failed":
169
+ raise click.exceptions.Exit(1)
170
+ if result.status == "partial_success":
171
+ raise click.exceptions.Exit(3)
172
+
173
+
174
+ @cli.command("run-acled-static")
175
+ @click.pass_context
176
+ def run_acled_static(ctx: click.Context):
177
+ """Run the static ACLED Dataset Runner tracer."""
178
+ runtime = create_storage_runtime_from_env(ctx.obj["runtime_env"])
179
+ with DatasetRunner.from_runtime(runtime) as runner:
180
+ result = runner.run_dataset("acled")
181
+
182
+ click.echo(f"status={result.status}")
183
+ click.echo(f"run_id={result.run_id}")
184
+ click.echo(f"version_id={result.version_id}")
185
+ click.echo(f"stages={','.join(result.stages)}")
186
+
187
+
188
+ @cli.command("run-rr-static")
189
+ @click.pass_context
190
+ def run_rr_static(ctx: click.Context):
191
+ """Run the static RR Dataset Runner tracer."""
192
+ runtime = create_storage_runtime_from_env(ctx.obj["runtime_env"])
193
+ with DatasetRunner.from_runtime(runtime) as runner:
194
+ result = runner.run_dataset("rr")
195
+
196
+ click.echo(f"status={result.status}")
197
+ click.echo(f"run_id={result.run_id}")
198
+ click.echo(f"version_id={result.version_id}")
199
+ click.echo(f"stages={','.join(result.stages)}")