mounts-project 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ """Unofficial Python client for the MOUNTS project.
2
+
3
+ Scrapes SO2 and thermal timeseries from http://www.mounts-project.com and
4
+ exposes them as pandas DataFrames. See :class:`mounts_project.core.MountsProject`
5
+ for the main entry point.
6
+ """
7
+
8
+ from importlib.metadata import version
9
+
10
+ from mounts_project.core import MountsProject
11
+
12
+
13
+ __version__ = version("mounts-project")
14
+ __author__ = "Martanto"
15
+ __author_email__ = "martanto@live.com"
16
+ __license__ = "MIT"
17
+ __copyright__ = "Copyright (c) 2026, Martanto"
18
+ __url__ = "https://github.com/martanto/mounts-project"
19
+
20
+ __all__ = [
21
+ "__version__",
22
+ "__author__",
23
+ "__author_email__",
24
+ "__license__",
25
+ "__copyright__",
26
+ "MountsProject",
27
+ ]
mounts_project/cli.py ADDED
@@ -0,0 +1,87 @@
1
+ """CLI entry point for the mounts-project package.
2
+
3
+ Exposes two subcommands:
4
+
5
+ mounts save --type csv # extract + save the default catalog
6
+ mounts dashboard # launch the Streamlit dashboard
7
+
8
+ Registered via ``[project.scripts] mounts = "mounts_project.cli:cli"`` in
9
+ ``pyproject.toml``.
10
+ """
11
+
12
+ import sys
13
+ import subprocess
14
+ from typing import Literal, cast
15
+ from importlib.resources import files
16
+
17
+ from mounts_project import MountsProject
18
+
19
+ import click
20
+
21
+
22
+ @click.group()
23
+ @click.version_option(package_name="mounts-project")
24
+ def cli() -> None:
25
+ """Command-line interface for the MOUNTS scraper and dashboard."""
26
+
27
+
28
+ @cli.command()
29
+ @click.option(
30
+ "--type",
31
+ "filetype",
32
+ type=click.Choice(["csv", "xlsx"], case_sensitive=False),
33
+ default="csv",
34
+ show_default=True,
35
+ help="Output file format.",
36
+ )
37
+ @click.option(
38
+ "--output-dir",
39
+ type=click.Path(file_okay=False),
40
+ default=None,
41
+ help="Override output directory (default: ./output).",
42
+ )
43
+ @click.option(
44
+ "--overwrite/--no-overwrite",
45
+ default=False,
46
+ show_default=True,
47
+ help="Re-fetch from MOUNTS even when cached JSON exists.",
48
+ )
49
+ @click.option(
50
+ "--verbose",
51
+ "-v",
52
+ is_flag=True,
53
+ help="Emit per-volcano info logs during extraction.",
54
+ )
55
+ def save(
56
+ filetype: str,
57
+ output_dir: str | None,
58
+ overwrite: bool,
59
+ verbose: bool,
60
+ ) -> None:
61
+ """Extract every volcano in the default catalog and save to CSV/XLSX."""
62
+ MountsProject(
63
+ output_dir=output_dir,
64
+ overwrite=overwrite,
65
+ verbose=verbose,
66
+ ).extract().save(filetype=cast(Literal["csv", "xlsx"], filetype.lower()))
67
+
68
+
69
+ @cli.command(context_settings={"ignore_unknown_options": True})
70
+ @click.argument("streamlit_args", nargs=-1, type=click.UNPROCESSED)
71
+ def dashboard(streamlit_args: tuple[str, ...]) -> None:
72
+ """Launch the Streamlit dashboard.
73
+
74
+ Extra arguments are forwarded to ``streamlit run``, e.g.
75
+
76
+ mounts dashboard --server.port 9000
77
+ """
78
+ dashboard_path = files("mounts_project").joinpath("dashboard.py")
79
+ cmd = [
80
+ sys.executable,
81
+ "-m",
82
+ "streamlit",
83
+ "run",
84
+ str(dashboard_path),
85
+ *streamlit_args,
86
+ ]
87
+ raise SystemExit(subprocess.call(cmd))
@@ -0,0 +1,74 @@
1
+ """Package constants.
2
+
3
+ Holds the MOUNTS base URLs, the default Indonesian volcano catalog used by
4
+ :meth:`mounts_project.core.MountsProject.extract` when no ``volcanoes`` argument is
5
+ provided, and the path / unit / color constants consumed by the Streamlit
6
+ dashboard.
7
+ """
8
+
9
+ import os
10
+
11
+
12
+ _MOUNTS_HOME_URL = "http://mounts-project.com"
13
+ _MOUNTS_TIMESERIES_URL = _MOUNTS_HOME_URL + "/timeseries"
14
+
15
+ OUTPUT_DIR = os.path.join(os.getcwd(), "output")
16
+ CSV_PATH = os.path.join(OUTPUT_DIR, "all-volcanoes.csv")
17
+ XLSX_PATH = os.path.join(OUTPUT_DIR, "all-volcanoes.xlsx")
18
+
19
+ SO2_UNIT = "tons/day"
20
+ THERMAL_UNIT = "km²"
21
+
22
+ SO2_COLOR = "orange"
23
+ THERMAL_COLOR = "red"
24
+
25
+ _VOLCANOES: list[dict[str, str]] = [
26
+ {
27
+ "name": "Lewotobi Laki-laki",
28
+ "code": "264180",
29
+ },
30
+ {
31
+ "name": "Marapi",
32
+ "code": "261140",
33
+ },
34
+ {
35
+ "name": "Anak Krakatau",
36
+ "code": "262000",
37
+ },
38
+ {
39
+ "name": "Kerinci",
40
+ "code": "261170",
41
+ },
42
+ {
43
+ "name": "Karangetang",
44
+ "code": "267020",
45
+ },
46
+ {
47
+ "name": "Dukono",
48
+ "code": "268010",
49
+ },
50
+ {
51
+ "name": "Ili Lewotolok",
52
+ "code": "264230",
53
+ },
54
+ {
55
+ "name": "Ibu",
56
+ "code": "268030",
57
+ },
58
+ {
59
+ "name": "Semeru",
60
+ "code": "263300",
61
+ },
62
+ {
63
+ "name": "Raung",
64
+ "code": "263340",
65
+ },
66
+ {
67
+ "name": "Ijen",
68
+ "code": "263350",
69
+ },
70
+ {
71
+ "name": "Slamet",
72
+ "code": "263180",
73
+ },
74
+ ]
mounts_project/core.py ADDED
@@ -0,0 +1,278 @@
1
+ """Top-level orchestrator for the MOUNTS scrape-and-export pipeline.
2
+
3
+ Defines :class:`MountsProject`, whose intended call chain is
4
+ ``MountsProject(...).extract().save(filetype=...)``. Network responses are
5
+ cached as JSON under ``<output_dir>/json/`` so subsequent runs work offline.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ from typing import Any, Self, Literal
11
+
12
+ from mounts_project.utils import (
13
+ slugify,
14
+ ensure_dir,
15
+ get_so2_values,
16
+ get_thermal_values,
17
+ get_json_from_javascript,
18
+ )
19
+ from mounts_project.logger import logger
20
+ from mounts_project.constants import _VOLCANOES, _MOUNTS_TIMESERIES_URL
21
+
22
+ import pandas as pd
23
+ import requests
24
+
25
+
26
+ class MountsProject:
27
+ """Orchestrator for scraping and exporting MOUNTS timeseries data.
28
+
29
+ Holds runtime state (extracted DataFrames, per-volcano catalogs, written
30
+ file paths) and exposes the standard pipeline ``MountsProject(...).extract()
31
+ .save(filetype=...)``. Network responses are cached as JSON under
32
+ ``<output_dir>/json/`` so subsequent runs work offline unless
33
+ ``overwrite=True``.
34
+
35
+ Attributes:
36
+ filter_values (float | None): Lower-bound filter applied to ``value``.
37
+ Rows with ``value <= filter_values`` are dropped. ``None`` disables
38
+ filtering.
39
+ output_dir (str): Root directory for cached JSON and exported files.
40
+ overwrite (bool): If ``True``, re-fetch from MOUNTS even when a cached
41
+ JSON file exists.
42
+ verbose (bool): If ``True``, emit per-volcano info logs during fetch.
43
+ data (dict[str, pd.DataFrame]): Per-volcano extracted DataFrames keyed
44
+ by volcano name. Populated by :meth:`extract`.
45
+ catalogs (list[dict[str, Any]]): Per-volcano metadata (``name``,
46
+ ``code``, ``updated_at``). Populated by :meth:`extract`.
47
+ files (list[str]): Paths of files written by :meth:`save`.
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ filter_values: float | None = 0.1,
53
+ output_dir: str | None = None,
54
+ overwrite: bool = False,
55
+ verbose: bool = False,
56
+ ):
57
+ """Initialise a :class:`MountsProject` instance.
58
+
59
+ Args:
60
+ filter_values (float | None, optional): Lower bound applied to the
61
+ ``value`` column after extraction. Pass ``None`` to disable.
62
+ Defaults to ``0.1``.
63
+ output_dir (str | None, optional): Root directory for cached JSON
64
+ and exported CSV/XLSX files. Defaults to ``<cwd>/output``.
65
+ overwrite (bool, optional): Force re-fetching from MOUNTS even when
66
+ a cached JSON file exists. Defaults to ``False``.
67
+ verbose (bool, optional): Emit per-volcano info logs during fetch.
68
+ Defaults to ``False``.
69
+ """
70
+ output_dir = (
71
+ output_dir
72
+ if output_dir is not None
73
+ else os.path.join(os.getcwd(), "output")
74
+ )
75
+
76
+ self.filter_values = filter_values
77
+ self.output_dir = output_dir
78
+ self.overwrite = overwrite
79
+ self.verbose = verbose
80
+
81
+ self.data: dict[str, pd.DataFrame] = {}
82
+ self.catalogs: list[dict[str, Any]] = []
83
+ self.files: list[str] = []
84
+
85
+ def extract_single_volcano(
86
+ self,
87
+ name: str,
88
+ code: str,
89
+ ) -> pd.DataFrame:
90
+ """Fetch and assemble the combined SO2 + thermal DataFrame for one volcano.
91
+
92
+ Calls :meth:`_get_json` (which handles the disk cache), then merges the
93
+ SO2 and thermal series, parses datetimes, adds ``date``/``time``/
94
+ ``code``/``name`` columns, sets ``datetime`` as the index, and applies
95
+ the ``filter_values`` lower bound when set.
96
+
97
+ Args:
98
+ name (str): Volcano name (used for the ``name`` column and cache
99
+ filename).
100
+ code (str): MOUNTS volcano code (used in the URL and the ``code``
101
+ column).
102
+
103
+ Returns:
104
+ pd.DataFrame: Combined SO2 and thermal observations indexed by
105
+ ``datetime``, with columns ``value``, ``graph``, ``type``, ``date``,
106
+ ``time``, ``code``, and ``name``.
107
+ """
108
+ graph_json = self._get_json(name, code)
109
+
110
+ so2 = get_so2_values(graph_json)
111
+ thermal = get_thermal_values(graph_json)
112
+
113
+ df = pd.concat([so2, thermal])
114
+
115
+ df["datetime"] = pd.to_datetime(df["datetime"])
116
+ df["date"] = df["datetime"].apply(lambda x: x.strftime("%Y-%m-%d"))
117
+ df["time"] = df["datetime"].apply(lambda x: x.strftime("%H:%M:%S"))
118
+ df["code"] = code
119
+ df["name"] = name
120
+ df = df.set_index("datetime")
121
+
122
+ if self.filter_values is not None:
123
+ df = df[df["value"] > self.filter_values]
124
+
125
+ return df
126
+
127
+ def extract(self, volcanoes: list[dict[str, str]] | None = None) -> Self:
128
+ """Extract timeseries for a list of volcanoes and populate ``self.data``.
129
+
130
+ Iterates over the given volcanoes (or the built-in :data:`_VOLCANOES`
131
+ catalog when ``None``) and calls :meth:`extract_single_volcano` for
132
+ each. Also builds ``self.catalogs`` with the last observation timestamp
133
+ per volcano.
134
+
135
+ Args:
136
+ volcanoes (list[dict[str, str]] | None, optional): Volcanoes to
137
+ extract. Each entry must have ``name`` and ``code`` keys. When
138
+ ``None``, the built-in 12-volcano Indonesian catalog is used.
139
+ Defaults to ``None``.
140
+
141
+ Returns:
142
+ Self: This :class:`MountsProject` instance, to enable chaining with
143
+ :meth:`save`.
144
+ """
145
+ volcanoes = volcanoes if volcanoes is not None else _VOLCANOES
146
+
147
+ self.data = {}
148
+ self.catalogs = []
149
+ for volcano in volcanoes:
150
+ try:
151
+ df = self.extract_single_volcano(volcano["name"], volcano["code"])
152
+ except Exception as e:
153
+ logger.error(f"[{volcano['name']}] extract failed: {e}")
154
+ continue
155
+ self.data[volcano["name"]] = df
156
+ self.catalogs.append(
157
+ {
158
+ "name": volcano["name"],
159
+ "code": volcano["code"],
160
+ "updated_at": df.index.max(),
161
+ }
162
+ )
163
+
164
+ return self
165
+
166
+ def save(
167
+ self, filetype: Literal["csv", "xlsx"] = "csv", merge: bool = True
168
+ ) -> Self:
169
+ """Write per-volcano files plus a merged ``all-volcanoes`` export.
170
+
171
+ Writes each DataFrame in ``self.data`` to
172
+ ``<output_dir>/<filetype>/<slug>.<filetype>`` and a concatenated file to
173
+ ``<output_dir>/all-volcanoes.<filetype>``. Calls :meth:`extract`
174
+ automatically when ``self.data`` is empty.
175
+
176
+ Args:
177
+ filetype (Literal["csv", "xlsx"], optional): Output format. Defaults
178
+ to ``"csv"``.
179
+ merge (bool, optional): Reserved for future use; currently the
180
+ merged file is always written. Defaults to ``True``.
181
+
182
+ Returns:
183
+ Self: This :class:`MountsProject` instance, to enable chaining.
184
+ """
185
+ save_dir = "csv" if filetype == "csv" else "xlsx"
186
+ save_dir = os.path.join(self.output_dir, save_dir)
187
+ ensure_dir(save_dir)
188
+
189
+ if len(self.data) == 0:
190
+ self.extract()
191
+
192
+ files: list[str] = []
193
+
194
+ dfs = []
195
+ for volcano_name, df in self.data.items():
196
+ filename = slugify(volcano_name)
197
+ filepath = os.path.join(save_dir, f"{filename}.{filetype}")
198
+
199
+ if filetype == "csv":
200
+ df.to_csv(filepath, index=True)
201
+ else:
202
+ df.to_excel(filepath, index=True)
203
+
204
+ dfs.append(df)
205
+
206
+ logger.info(f"[{volcano_name}] Saved to: {filepath}")
207
+ files.append(filepath)
208
+
209
+ df_concat = pd.concat(dfs, ignore_index=False)
210
+
211
+ if filetype == "csv":
212
+ df_concat.to_csv(
213
+ os.path.join(self.output_dir, "all-volcanoes.csv"), index=True
214
+ )
215
+ else:
216
+ df_concat.to_excel(
217
+ os.path.join(self.output_dir, "all-volcanoes.xlsx"), index=True
218
+ )
219
+
220
+ self.files = files
221
+
222
+ return self
223
+
224
+ def _get_json(
225
+ self,
226
+ name: str,
227
+ code: str,
228
+ ):
229
+ """Return the parsed MOUNTS graph JSON for one volcano (cached on disk).
230
+
231
+ Acts as the network/cache boundary: when a cached file exists at
232
+ ``<output_dir>/json/<slug>.json`` and ``self.overwrite`` is ``False``,
233
+ it is read from disk. Otherwise the MOUNTS timeseries page is fetched,
234
+ the embedded ``var graph = {...}`` blob is extracted via
235
+ :func:`get_json_from_javascript`, and the result is written to the
236
+ cache before being returned.
237
+
238
+ Args:
239
+ name (str): Volcano name (used to build the cache filename).
240
+ code (str): MOUNTS volcano code (used to build the request URL).
241
+
242
+ Returns:
243
+ dict: Parsed MOUNTS graph object containing Plotly traces under
244
+ ``data``.
245
+
246
+ Raises:
247
+ requests.exceptions.RequestException: If the HTTP request to MOUNTS
248
+ fails.
249
+ """
250
+ url = _MOUNTS_TIMESERIES_URL + "/" + str(code)
251
+
252
+ try:
253
+ json_dir = os.path.join(self.output_dir, "json")
254
+ ensure_dir(json_dir)
255
+
256
+ filename = slugify(f"{name}-{code}")
257
+ json_filepath = os.path.join(json_dir, f"{filename}.json")
258
+
259
+ if not self.overwrite and os.path.exists(json_filepath):
260
+ if self.verbose:
261
+ logger.info(f"File {json_filepath} already exists, skipping")
262
+ graph_json: dict = json.load(open(json_filepath))
263
+ return graph_json
264
+
265
+ if self.verbose:
266
+ logger.info(f"Extracting {name} ... ")
267
+
268
+ response = requests.get(url)
269
+ graph_json = get_json_from_javascript(response)
270
+
271
+ with open(json_filepath, "w") as write_file:
272
+ json.dump(graph_json, write_file, indent=2)
273
+
274
+ return graph_json
275
+
276
+ except requests.exceptions.RequestException as e:
277
+ logger.error(f"Error getting {name}: {e}")
278
+ raise e