fpu-barometer-admin 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. fpu_barometer_admin-0.3.0/.gitignore +9 -0
  2. fpu_barometer_admin-0.3.0/LICENSE.md +7 -0
  3. fpu_barometer_admin-0.3.0/PKG-INFO +27 -0
  4. fpu_barometer_admin-0.3.0/README.md +3 -0
  5. fpu_barometer_admin-0.3.0/pyproject.toml +51 -0
  6. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/__init__.py +6 -0
  7. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/cli/__init__.py +5 -0
  8. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/cli/commands.py +199 -0
  9. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/cli/deploy.py +719 -0
  10. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/connectors/__init__.py +56 -0
  11. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/connectors/acled_connector.py +77 -0
  12. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/connectors/base_connector.py +60 -0
  13. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/connectors/cpj_connector.py +92 -0
  14. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/connectors/ert_connector.py +134 -0
  15. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/connectors/gdelt_connector.py +403 -0
  16. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/connectors/mfrr_connector.py +171 -0
  17. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/connectors/rr_connector.py +84 -0
  18. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/connectors/static_sources.py +41 -0
  19. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/connectors/vdem_connector.py +165 -0
  20. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/handlers/__init__.py +6 -0
  21. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/handlers/function_app.py +543 -0
  22. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/processors/__init__.py +46 -0
  23. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/processors/acled_processor.py +263 -0
  24. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/processors/base_processor.py +23 -0
  25. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/processors/cpj_processor.py +147 -0
  26. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/processors/ert_processor.py +72 -0
  27. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/processors/gdelt_processor.py +260 -0
  28. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/processors/mfrr_processor.py +327 -0
  29. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/processors/rr_processor.py +208 -0
  30. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/processors/vdem_processor.py +70 -0
  31. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/runners/__init__.py +19 -0
  32. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/runners/definitions.py +159 -0
  33. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/runners/runners.py +291 -0
  34. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/runners/scheduler.py +148 -0
  35. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/runners/seed.py +399 -0
  36. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/schemas/__init__.py +1 -0
  37. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/schemas/event.py +362 -0
  38. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/schemas/predictor.py +418 -0
  39. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/storage/__init__.py +39 -0
  40. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/storage/catalog.py +359 -0
  41. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/storage/factory.py +165 -0
  42. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/storage/objects.py +463 -0
  43. fpu_barometer_admin-0.3.0/src/fpu_barometer_admin/storage/reader.py +410 -0
@@ -0,0 +1,9 @@
1
+ data/*
2
+ .env
3
+ .pytest_cache/
4
+ .venv/
5
+ __pycache__/
6
+ *.pyc
7
+ *.egg-info/
8
+ *.whl
9
+ .ruff_cache/
@@ -0,0 +1,7 @@
1
+ Copyright 2026 Free Press Unlimited
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.4
2
+ Name: fpu-barometer-admin
3
+ Version: 0.3.0
4
+ Summary: Admin, ETL, storage, and deployment tooling for Barometer
5
+ Project-URL: Homepage, https://www.freepressunlimited.org
6
+ Author: Phillip Kersten, Jannes Kelso, Jos Bartman
7
+ License-Expression: MIT
8
+ License-File: LICENSE.md
9
+ Keywords: admin,barometer,data-science,etl,fpu,journalists,press
10
+ Requires-Python: >=3.10
11
+ Requires-Dist: azure-functions>=1.18.0
12
+ Requires-Dist: azure-identity>=1.12.0
13
+ Requires-Dist: azure-storage-blob>=12.0.0
14
+ Requires-Dist: build>=1.0.0
15
+ Requires-Dist: click>=8.0.0
16
+ Requires-Dist: country-converter>=1.2
17
+ Requires-Dist: duckdb>=0.9.0
18
+ Requires-Dist: fpu-barometer>=0.3.0
19
+ Requires-Dist: pandas>=2.0.0
20
+ Requires-Dist: pyarrow>=10.0.0
21
+ Requires-Dist: pyreadr>=0.5.0
22
+ Requires-Dist: requests>=2.28.0
23
+ Description-Content-Type: text/markdown
24
+
25
+ # fpu-barometer-admin
26
+
27
+ Admin, ETL, storage, and deployment tooling for Barometer.
@@ -0,0 +1,3 @@
1
+ # fpu-barometer-admin
2
+
3
+ Admin, ETL, storage, and deployment tooling for Barometer.
@@ -0,0 +1,51 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "fpu-barometer-admin"
7
+ version = "0.3.0"
8
+ description = "Admin, ETL, storage, and deployment tooling for Barometer"
9
+ authors = [
10
+ {name = "Phillip Kersten"},
11
+ {name = "Jannes Kelso"},
12
+ {name = "Jos Bartman"},
13
+ ]
14
+ keywords = [
15
+ "press",
16
+ "journalists",
17
+ "fpu",
18
+ "barometer",
19
+ "admin",
20
+ "etl",
21
+ "data-science",
22
+ ]
23
+ readme = "README.md"
24
+ requires-python = ">=3.10"
25
+ license = "MIT"
26
+ dependencies = [
27
+ "fpu-barometer>=0.3.0",
28
+ "country-converter>=1.2",
29
+ "click>=8.0.0",
30
+ "duckdb>=0.9.0",
31
+ "pyarrow>=10.0.0",
32
+ "azure-storage-blob>=12.0.0",
33
+ "azure-identity>=1.12.0",
34
+ "azure-functions>=1.18.0",
35
+ "build>=1.0.0",
36
+ "requests>=2.28.0",
37
+ "pandas>=2.0.0",
38
+ "pyreadr>=0.5.0",
39
+ ]
40
+
41
+ [project.urls]
42
+ Homepage = "https://www.freepressunlimited.org"
43
+
44
+ [project.scripts]
45
+ fpu-barometer-admin = "fpu_barometer_admin.cli.commands:cli"
46
+
47
+ [tool.uv.sources]
48
+ fpu-barometer = { workspace = true }
49
+
50
+ [tool.hatch.build.targets.wheel]
51
+ packages = ["src/fpu_barometer_admin"]
@@ -0,0 +1,6 @@
1
+ """FPU Admin backend/admin implementation package."""
2
+
3
+ from .runners import DatasetRunner, DatasetRunResult
4
+
5
+ __version__ = "0.3.0"
6
+ __all__ = ["DatasetRunner", "DatasetRunResult"]
@@ -0,0 +1,5 @@
1
+ """Command-line interface for FPU admin pipeline."""
2
+
3
+ from .commands import cli
4
+
5
+ __all__ = ["cli"]
@@ -0,0 +1,199 @@
1
+ """CLI commands for the canonical FPU admin package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from pathlib import Path
7
+
8
+ import click
9
+
10
+ from fpu_barometer_admin.cli.deploy import (
11
+ AzureCodeRedeployConfig,
12
+ AzureDeployConfig,
13
+ deploy_azure,
14
+ redeploy_azure_code,
15
+ )
16
+ from fpu_barometer_admin.runners import DatasetRunner
17
+ from fpu_barometer_admin.runners.seed import seed_gdelt_processed_artifact
18
+ from fpu_barometer_admin.storage import create_storage_runtime_from_env
19
+
20
+
21
+ @click.group()
22
+ @click.option("--data-path", default="./data", help="Path to data directory")
23
+ @click.pass_context
24
+ def cli(ctx: click.Context, data_path: str):
25
+ """FPU Admin command line interface."""
26
+ ctx.ensure_object(dict)
27
+ runtime_env = dict(os.environ)
28
+ runtime_env["FPU_DATA_PATH"] = str(Path(data_path))
29
+ ctx.obj["runtime_env"] = runtime_env
30
+
31
+
32
+ @cli.command("deploy-azure")
33
+ @click.option("--resource-group", default="fpu-research-rg", show_default=True)
34
+ @click.option("--location", default="westeurope", show_default=True)
35
+ @click.option("--environment", default="prod", show_default=True)
36
+ @click.option("--storage-container", default="datasets", show_default=True)
37
+ @click.option(
38
+ "--project-root",
39
+ type=click.Path(file_okay=False, path_type=Path),
40
+ default=None,
41
+ help="Project root containing pyproject.toml. Auto-detected when omitted.",
42
+ )
43
+ @click.option("--python-version", default="3.10", show_default=True)
44
+ def deploy_azure_command(
45
+ resource_group: str,
46
+ location: str,
47
+ environment: str,
48
+ storage_container: str,
49
+ project_root: Path | None,
50
+ python_version: str,
51
+ ):
52
+ """Deploy Barometer's Azure Functions API and Blob storage."""
53
+
54
+ result = deploy_azure(
55
+ AzureDeployConfig(
56
+ resource_group=resource_group,
57
+ location=location,
58
+ environment=environment,
59
+ storage_container=storage_container,
60
+ project_root=project_root,
61
+ python_version=python_version,
62
+ )
63
+ )
64
+ click.echo(f"function_app={result.function_app}")
65
+ click.echo(f"function_app_url={result.function_app_url}")
66
+ click.echo(f"storage_account={result.storage_account}")
67
+ click.echo(f"storage_container={result.storage_container}")
68
+ click.echo(f"config_path={result.config_path}")
69
+
70
+
71
+ @cli.command("redeploy-azure-code")
72
+ @click.option("--resource-group", default=None, help="Existing Azure resource group.")
73
+ @click.option("--function-app", default=None, help="Existing Azure Function App name.")
74
+ @click.option(
75
+ "--project-root",
76
+ type=click.Path(file_okay=False, path_type=Path),
77
+ default=None,
78
+ help="Project root containing pyproject.toml. Auto-detected when omitted.",
79
+ )
80
+ @click.option("--scheduler-cron", default="0 15 2 * * *", show_default=True)
81
+ def redeploy_azure_code_command(
82
+ resource_group: str | None,
83
+ function_app: str | None,
84
+ project_root: Path | None,
85
+ scheduler_cron: str,
86
+ ):
87
+ """Redeploy code to an existing Azure Function App without touching storage."""
88
+
89
+ result = redeploy_azure_code(
90
+ AzureCodeRedeployConfig(
91
+ resource_group=resource_group,
92
+ function_app=function_app,
93
+ project_root=project_root,
94
+ scheduler_cron=scheduler_cron,
95
+ )
96
+ )
97
+ click.echo(f"function_app={result.function_app}")
98
+ click.echo(f"function_app_url={result.function_app_url}")
99
+ click.echo(f"resource_group={result.resource_group}")
100
+ click.echo(f"scheduler_cron={result.scheduler_cron}")
101
+ click.echo(f"config_path={result.config_path}")
102
+
103
+
104
+ @cli.command("seed-processed-artifact")
105
+ @click.argument("dataset")
106
+ @click.option(
107
+ "--processed-artifact",
108
+ "processed_path",
109
+ required=True,
110
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
111
+ help="Local canonical processed GDELT Event Parquet artifact to publish to Azure.",
112
+ )
113
+ @click.option(
114
+ "--watermark-after",
115
+ required=True,
116
+ help="Explicit GDELT watermark after the seeded period, as YYYYmmddHHMMSS.",
117
+ )
118
+ @click.option(
119
+ "--azure-storage-connection-string",
120
+ envvar="FPU_AZURE_STORAGE_CONNECTION_STRING",
121
+ required=True,
122
+ help="Azure Storage connection string for the target Barometer storage account.",
123
+ )
124
+ @click.option(
125
+ "--azure-storage-container",
126
+ envvar="FPU_AZURE_STORAGE_CONTAINER",
127
+ default="datasets",
128
+ show_default=True,
129
+ help="Azure Blob container containing Barometer dataset objects and catalog records.",
130
+ )
131
+ @click.pass_context
132
+ def seed_processed_artifact(
133
+ ctx: click.Context,
134
+ dataset: str,
135
+ processed_path: Path,
136
+ watermark_after: str,
137
+ azure_storage_connection_string: str,
138
+ azure_storage_container: str,
139
+ ):
140
+ """Publish a local processed replacement artifact to Azure and update the catalog."""
141
+
142
+ if dataset != "gdelt":
143
+ raise click.BadParameter("only 'gdelt' is supported", param_hint="dataset")
144
+ if len(watermark_after) != 14 or not watermark_after.isdigit():
145
+ raise click.BadParameter(
146
+ "must be in YYYYmmddHHMMSS format", param_hint="--watermark-after"
147
+ )
148
+ if processed_path.suffix.lower() != ".parquet":
149
+ raise click.BadParameter(
150
+ "must point to a local processed .parquet file",
151
+ param_hint="--processed-artifact",
152
+ )
153
+
154
+ runtime_env = dict(ctx.obj["runtime_env"])
155
+ runtime_env["FPU_STORAGE_BACKEND"] = "azure"
156
+ runtime_env["FPU_AZURE_STORAGE_CONNECTION_STRING"] = azure_storage_connection_string
157
+ runtime_env["FPU_AZURE_STORAGE_CONTAINER"] = azure_storage_container
158
+ runtime = create_storage_runtime_from_env(runtime_env)
159
+ result = seed_gdelt_processed_artifact(
160
+ object_storage=runtime.object_storage,
161
+ catalog=runtime.create_catalog(),
162
+ processed_path=processed_path,
163
+ watermark_after=watermark_after,
164
+ reader_provider=runtime.get_reader,
165
+ )
166
+ for key, value in result.key_values():
167
+ click.echo(f"{key}={value}")
168
+ if result.status == "failed":
169
+ raise click.exceptions.Exit(1)
170
+ if result.status == "partial_success":
171
+ raise click.exceptions.Exit(3)
172
+
173
+
174
+ @cli.command("run-acled-static")
175
+ @click.pass_context
176
+ def run_acled_static(ctx: click.Context):
177
+ """Run the static ACLED Dataset Runner tracer."""
178
+ runtime = create_storage_runtime_from_env(ctx.obj["runtime_env"])
179
+ with DatasetRunner.from_runtime(runtime) as runner:
180
+ result = runner.run_dataset("acled")
181
+
182
+ click.echo(f"status={result.status}")
183
+ click.echo(f"run_id={result.run_id}")
184
+ click.echo(f"version_id={result.version_id}")
185
+ click.echo(f"stages={','.join(result.stages)}")
186
+
187
+
188
+ @cli.command("run-rr-static")
189
+ @click.pass_context
190
+ def run_rr_static(ctx: click.Context):
191
+ """Run the static RR Dataset Runner tracer."""
192
+ runtime = create_storage_runtime_from_env(ctx.obj["runtime_env"])
193
+ with DatasetRunner.from_runtime(runtime) as runner:
194
+ result = runner.run_dataset("rr")
195
+
196
+ click.echo(f"status={result.status}")
197
+ click.echo(f"run_id={result.run_id}")
198
+ click.echo(f"version_id={result.version_id}")
199
+ click.echo(f"stages={','.join(result.stages)}")