data-hub-watcher 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data_hub_watcher-0.1.0/LICENSE +21 -0
  2. data_hub_watcher-0.1.0/PKG-INFO +64 -0
  3. data_hub_watcher-0.1.0/README.md +29 -0
  4. data_hub_watcher-0.1.0/pyproject.toml +67 -0
  5. data_hub_watcher-0.1.0/setup.cfg +4 -0
  6. data_hub_watcher-0.1.0/src/data_hub_watcher/__init__.py +0 -0
  7. data_hub_watcher-0.1.0/src/data_hub_watcher/api_client.py +237 -0
  8. data_hub_watcher-0.1.0/src/data_hub_watcher/cli.py +1076 -0
  9. data_hub_watcher-0.1.0/src/data_hub_watcher/config_io.py +80 -0
  10. data_hub_watcher-0.1.0/src/data_hub_watcher/constants.py +164 -0
  11. data_hub_watcher-0.1.0/src/data_hub_watcher/events.py +77 -0
  12. data_hub_watcher-0.1.0/src/data_hub_watcher/heartbeat.py +123 -0
  13. data_hub_watcher-0.1.0/src/data_hub_watcher/models.py +254 -0
  14. data_hub_watcher-0.1.0/src/data_hub_watcher/monitor.py +306 -0
  15. data_hub_watcher-0.1.0/src/data_hub_watcher/run_detector.py +332 -0
  16. data_hub_watcher-0.1.0/src/data_hub_watcher/runtime.py +292 -0
  17. data_hub_watcher-0.1.0/src/data_hub_watcher/self_update.py +274 -0
  18. data_hub_watcher-0.1.0/src/data_hub_watcher/service.py +463 -0
  19. data_hub_watcher-0.1.0/src/data_hub_watcher/state.py +335 -0
  20. data_hub_watcher-0.1.0/src/data_hub_watcher/updater.py +602 -0
  21. data_hub_watcher-0.1.0/src/data_hub_watcher/uploader.py +299 -0
  22. data_hub_watcher-0.1.0/src/data_hub_watcher/util.py +25 -0
  23. data_hub_watcher-0.1.0/src/data_hub_watcher.egg-info/PKG-INFO +64 -0
  24. data_hub_watcher-0.1.0/src/data_hub_watcher.egg-info/SOURCES.txt +36 -0
  25. data_hub_watcher-0.1.0/src/data_hub_watcher.egg-info/dependency_links.txt +1 -0
  26. data_hub_watcher-0.1.0/src/data_hub_watcher.egg-info/entry_points.txt +2 -0
  27. data_hub_watcher-0.1.0/src/data_hub_watcher.egg-info/requires.txt +9 -0
  28. data_hub_watcher-0.1.0/src/data_hub_watcher.egg-info/top_level.txt +1 -0
  29. data_hub_watcher-0.1.0/tests/test_monitor_initial_scan.py +395 -0
  30. data_hub_watcher-0.1.0/tests/test_preview_environment.py +159 -0
  31. data_hub_watcher-0.1.0/tests/test_run_detection_config.py +42 -0
  32. data_hub_watcher-0.1.0/tests/test_run_detector.py +241 -0
  33. data_hub_watcher-0.1.0/tests/test_run_detector_hydration.py +259 -0
  34. data_hub_watcher-0.1.0/tests/test_runtime.py +294 -0
  35. data_hub_watcher-0.1.0/tests/test_self_update.py +318 -0
  36. data_hub_watcher-0.1.0/tests/test_service.py +709 -0
  37. data_hub_watcher-0.1.0/tests/test_updater.py +696 -0
  38. data_hub_watcher-0.1.0/tests/test_uploader.py +193 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Arcadia Science
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,64 @@
1
+ Metadata-Version: 2.4
2
+ Name: data-hub-watcher
3
+ Version: 0.1.0
4
+ Summary: File-watcher agent for lab instrument PCs that ingests data into the Arcadia Science Data Hub.
5
+ Author-email: Arcadia Science <engineering@arcadiascience.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Arcadia-Science/data-hub
8
+ Project-URL: Documentation, https://github.com/Arcadia-Science/data-hub/blob/production/docs/guides/installing-a-watcher.md
9
+ Project-URL: Repository, https://github.com/Arcadia-Science/data-hub
10
+ Project-URL: Issues, https://github.com/Arcadia-Science/data-hub/issues
11
+ Keywords: arcadia,data-hub,lab,instruments,watcher,uploader
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Operating System :: Microsoft :: Windows
16
+ Classifier: Operating System :: POSIX :: Linux
17
+ Classifier: Operating System :: MacOS
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering
21
+ Classifier: Topic :: System :: Filesystems
22
+ Classifier: Topic :: System :: Monitoring
23
+ Requires-Python: >=3.12
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: click>=8.2.1
27
+ Requires-Dist: packaging>=24.0
28
+ Requires-Dist: pydantic>=2.11.9
29
+ Requires-Dist: pyyaml>=6.0
30
+ Requires-Dist: python-dotenv>=1.0
31
+ Requires-Dist: watchdog>=4.0.0
32
+ Provides-Extra: windows-service
33
+ Requires-Dist: pywin32; extra == "windows-service"
34
+ Dynamic: license-file
35
+
36
+ # data-hub-watcher
37
+
38
+ A file-watcher agent that runs on lab instrument PCs and uploads new files to the [Arcadia Science Data Hub](https://github.com/Arcadia-Science/data-hub). It groups files into runs, retries uploads, sends heartbeats, and can optionally run as a Windows service.
39
+
40
+ ## Install
41
+
42
+ ```sh
43
+ uv tool install data-hub-watcher
44
+ ```
45
+
46
+ The CLI is published as the `data-hub-watcher` script. After installing, walk through the interactive setup wizard:
47
+
48
+ ```sh
49
+ data-hub-watcher init
50
+ ```
51
+
52
+ ## Usage
53
+
54
+ ```sh
55
+ data-hub-watcher watch # start watching for files
56
+ data-hub-watcher self-update # check for and apply package updates
57
+ data-hub-watcher service install # Windows: install as a service
58
+ ```
59
+
60
+ See [the operator guide](https://github.com/Arcadia-Science/data-hub/blob/main/docs/guides/installing-a-watcher.md) for the full setup walk-through, configuration reference, and troubleshooting.
61
+
62
+ ## License
63
+
64
+ MIT — see the `LICENSE` file bundled with the wheel.
@@ -0,0 +1,29 @@
1
+ # data-hub-watcher
2
+
3
+ A file-watcher agent that runs on lab instrument PCs and uploads new files to the [Arcadia Science Data Hub](https://github.com/Arcadia-Science/data-hub). It groups files into runs, retries uploads, sends heartbeats, and can optionally run as a Windows service.
4
+
5
+ ## Install
6
+
7
+ ```sh
8
+ uv tool install data-hub-watcher
9
+ ```
10
+
11
+ The CLI is published as the `data-hub-watcher` script. After installing, walk through the interactive setup wizard:
12
+
13
+ ```sh
14
+ data-hub-watcher init
15
+ ```
16
+
17
+ ## Usage
18
+
19
+ ```sh
20
+ data-hub-watcher watch # start watching for files
21
+ data-hub-watcher self-update # check for and apply package updates
22
+ data-hub-watcher service install # Windows: install as a service
23
+ ```
24
+
25
+ See [the operator guide](https://github.com/Arcadia-Science/data-hub/blob/main/docs/guides/installing-a-watcher.md) for the full setup walk-through, configuration reference, and troubleshooting.
26
+
27
+ ## License
28
+
29
+ MIT — see the `LICENSE` file bundled with the wheel.
@@ -0,0 +1,67 @@
1
+ [project]
2
+ name = "data-hub-watcher"
3
+ version = "0.1.0"
4
+ description = "File-watcher agent for lab instrument PCs that ingests data into the Arcadia Science Data Hub."
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ license = "MIT"
8
+ license-files = ["LICENSE"]
9
+ authors = [
10
+ { name = "Arcadia Science", email = "engineering@arcadiascience.com" },
11
+ ]
12
+ keywords = ["arcadia", "data-hub", "lab", "instruments", "watcher", "uploader"]
13
+ classifiers = [
14
+ "Development Status :: 4 - Beta",
15
+ "Environment :: Console",
16
+ "Intended Audience :: Science/Research",
17
+ "Operating System :: Microsoft :: Windows",
18
+ "Operating System :: POSIX :: Linux",
19
+ "Operating System :: MacOS",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Scientific/Engineering",
23
+ "Topic :: System :: Filesystems",
24
+ "Topic :: System :: Monitoring",
25
+ ]
26
+ dependencies = [
27
+ "click>=8.2.1",
28
+ "packaging>=24.0",
29
+ "pydantic>=2.11.9",
30
+ "pyyaml>=6.0",
31
+ "python-dotenv>=1.0",
32
+ "watchdog>=4.0.0",
33
+ ]
34
+
35
+ [project.optional-dependencies]
36
+ windows-service = ["pywin32"]
37
+
38
+ [project.urls]
39
+ Homepage = "https://github.com/Arcadia-Science/data-hub"
40
+ # Operator-facing install + auto-update guide — what someone landing on
41
+ # the PyPI page actually needs. The developer-facing `docs/watcher.md`
42
+ # describes the editable-checkout workflow and is reachable from the
43
+ # repository link below.
44
+ Documentation = "https://github.com/Arcadia-Science/data-hub/blob/production/docs/guides/installing-a-watcher.md"
45
+ Repository = "https://github.com/Arcadia-Science/data-hub"
46
+ Issues = "https://github.com/Arcadia-Science/data-hub/issues"
47
+
48
+ # Test-only deps live in a PEP 735 group instead of `[project].dependencies`
49
+ # so the published wheel does not advertise `data-hub-shared` as a runtime
50
+ # requirement — the integration suite imports it (see `watcher/tests/integration/*`)
51
+ # but no runtime watcher code does, and the package is not published to PyPI.
52
+ # `uv sync` includes this group by default for local development.
53
+ [dependency-groups]
54
+ test = ["data-hub-shared"]
55
+
56
+ [tool.uv.sources]
57
+ data-hub-shared = { workspace = true }
58
+
59
+ [project.scripts]
60
+ data-hub-watcher = "data_hub_watcher.cli:cli"
61
+
62
+ [build-system]
63
+ requires = ["setuptools>=77.0", "wheel"]
64
+ build-backend = "setuptools.build_meta"
65
+
66
+ [tool.setuptools.packages.find]
67
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,237 @@
1
+ from __future__ import annotations
2
+ import logging
3
+ import os
4
+ from datetime import datetime, timezone
5
+ from typing import Any
6
+
7
+ import requests
8
+
9
+ from data_hub_watcher.models import (
10
+ ApiErrorDetail,
11
+ ConfigChecksumResponse,
12
+ EventsResponse,
13
+ FileResponse,
14
+ HeartbeatResponse,
15
+ InstrumentDetailResponse,
16
+ InstrumentResponse,
17
+ PresignedUploadResponse,
18
+ RegisterWatcherResponse,
19
+ RunDetailResponse,
20
+ RunResponse,
21
+ UploadQueueResponse,
22
+ WatcherUpdateInfoResponse,
23
+ )
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class ApiError(Exception):
29
+ """Raised when the Data Hub API returns a non-2xx response."""
30
+
31
+ def __init__(
32
+ self,
33
+ message: str,
34
+ status_code: int = 0,
35
+ detail: ApiErrorDetail | None = None,
36
+ ) -> None:
37
+ super().__init__(message)
38
+ self.message = message
39
+ self.status_code = status_code
40
+ self.detail = detail
41
+
42
+
43
+ DEFAULT_TIMEOUT: tuple[float, float] = (5, 30) # (connect, read) seconds
44
+
45
+
46
+ class DataHubClient:
47
+ """HTTP client for the Data Hub API."""
48
+
49
+ def __init__(
50
+ self,
51
+ base_url: str,
52
+ api_key: str | None = None,
53
+ timeout: tuple[float, float] = DEFAULT_TIMEOUT,
54
+ ) -> None:
55
+ self.base_url = base_url.rstrip("/")
56
+ self._timeout = timeout
57
+ # A persistent session reuses TCP connections across requests, which
58
+ # matters when the watcher is long-running and chatting with the API
59
+ # every heartbeat interval.
60
+ self._session = requests.Session()
61
+
62
+ # Allow the API key to be passed explicitly (e.g. during `init`) or
63
+ # fall back to the environment variable for normal operation.
64
+ key = api_key or os.environ.get("DATA_HUB_API_KEY", "")
65
+ if key:
66
+ self._session.headers["Authorization"] = f"Bearer {key}"
67
+
68
+ # ------------------------------------------------------------------
69
+ # Internal helpers
70
+ # ------------------------------------------------------------------
71
+
72
+ def _url(self, path: str) -> str:
73
+ return f"{self.base_url}{path}"
74
+
75
+ def _handle_error(self, resp: requests.Response) -> None:
76
+ """Parse an error body and raise `ApiError`."""
77
+ detail: ApiErrorDetail | None = None
78
+ try:
79
+ body = resp.json()
80
+ if "error" in body:
81
+ detail = ApiErrorDetail.model_validate(body["error"])
82
+ msg = detail.message
83
+ else:
84
+ msg = resp.text
85
+ except Exception:
86
+ msg = resp.text
87
+ raise ApiError(msg, status_code=resp.status_code, detail=detail)
88
+
89
+ def _request(
90
+ self,
91
+ method: str,
92
+ path: str,
93
+ *,
94
+ json: dict[str, Any] | None = None,
95
+ params: dict[str, Any] | None = None,
96
+ ) -> requests.Response:
97
+ try:
98
+ resp = self._session.request(
99
+ method, self._url(path), json=json, params=params, timeout=self._timeout
100
+ )
101
+ except requests.ConnectionError as exc:
102
+ raise ApiError(f"Connection error: {exc}") from exc
103
+ except requests.Timeout as exc:
104
+ raise ApiError(f"Request timed out: {exc}") from exc
105
+
106
+ if not resp.ok:
107
+ self._handle_error(resp)
108
+ return resp
109
+
110
+ # ------------------------------------------------------------------
111
+ # Instruments
112
+ # ------------------------------------------------------------------
113
+
114
+ def list_instruments(self) -> list[InstrumentResponse]:
115
+ resp = self._request("GET", "/instruments")
116
+ return [InstrumentResponse.model_validate(item) for item in resp.json()]
117
+
118
+ def create_instrument(self, id: str, display_name: str | None = None) -> InstrumentResponse:
119
+ payload: dict[str, Any] = {"id": id}
120
+ if display_name:
121
+ payload["display_name"] = display_name
122
+ resp = self._request("POST", "/instruments", json=payload)
123
+ return InstrumentResponse.model_validate(resp.json())
124
+
125
+ def get_instrument(self, instrument_id: str) -> InstrumentDetailResponse:
126
+ resp = self._request("GET", f"/instruments/{instrument_id}")
127
+ return InstrumentDetailResponse.model_validate(resp.json())
128
+
129
+ # ------------------------------------------------------------------
130
+ # Watchers
131
+ # ------------------------------------------------------------------
132
+
133
+ def register_watcher(
134
+ self,
135
+ instrument_id: str,
136
+ hostname: str | None = None,
137
+ os_info: str | None = None,
138
+ ) -> RegisterWatcherResponse:
139
+ payload: dict[str, Any] = {"instrument_id": instrument_id}
140
+ if hostname:
141
+ payload["hostname"] = hostname
142
+ if os_info:
143
+ payload["os_info"] = os_info
144
+ resp = self._request("POST", "/watchers/register", json=payload)
145
+ return RegisterWatcherResponse.model_validate(resp.json())
146
+
147
+ def push_config(
148
+ self, watcher_id: str, config_yaml: str, checksum: str
149
+ ) -> ConfigChecksumResponse:
150
+ resp = self._request(
151
+ "PUT",
152
+ f"/watchers/{watcher_id}/config",
153
+ json={"config_yaml": config_yaml, "config_checksum": checksum},
154
+ )
155
+ return ConfigChecksumResponse.model_validate(resp.json())
156
+
157
+ def get_config_checksum(self, watcher_id: str) -> ConfigChecksumResponse | None:
158
+ """Return the remote checksum, or `None` if no config has been pushed.
159
+
160
+ A 404 is expected for newly registered watchers that haven't pushed
161
+ config yet — it is not an error condition.
162
+ """
163
+ try:
164
+ resp = self._request("GET", f"/watchers/{watcher_id}/config-checksum")
165
+ return ConfigChecksumResponse.model_validate(resp.json())
166
+ except ApiError as exc:
167
+ if exc.status_code == 404:
168
+ return None
169
+ raise
170
+
171
+ def send_heartbeat(self, watcher_id: str, payload: dict[str, Any]) -> HeartbeatResponse:
172
+ resp = self._request("POST", f"/watchers/{watcher_id}/heartbeat", json=payload)
173
+ return HeartbeatResponse.model_validate(resp.json())
174
+
175
+ def send_events(self, watcher_id: str, events: list[dict[str, Any]]) -> EventsResponse:
176
+ resp = self._request("POST", f"/watchers/{watcher_id}/events", json={"events": events})
177
+ return EventsResponse.model_validate(resp.json())
178
+
179
+ def get_update_info(self, watcher_id: str) -> WatcherUpdateInfoResponse:
180
+ """Fetch server-reported watcher release metadata.
181
+
182
+ Used by `self-update` and the in-process updater to decide whether
183
+ the running watcher should upgrade itself.
184
+ """
185
+ resp = self._request("GET", f"/watchers/{watcher_id}/update-check")
186
+ return WatcherUpdateInfoResponse.model_validate(resp.json())
187
+
188
+ # ------------------------------------------------------------------
189
+ # Runs
190
+ # ------------------------------------------------------------------
191
+
192
+ def report_run(self, instrument_id: str, run_data: dict[str, Any]) -> RunResponse:
193
+ resp = self._request("POST", f"/instruments/{instrument_id}/runs", json=run_data)
194
+ return RunResponse.model_validate(resp.json())
195
+
196
+ def update_run(
197
+ self, instrument_id: str, run_id: str, data: dict[str, Any]
198
+ ) -> RunDetailResponse:
199
+ resp = self._request("PATCH", f"/instruments/{instrument_id}/runs/{run_id}", json=data)
200
+ return RunDetailResponse.model_validate(resp.json())
201
+
202
+ # ------------------------------------------------------------------
203
+ # Upload queue / files
204
+ # ------------------------------------------------------------------
205
+
206
+ def get_upload_queue(self, watcher_id: str) -> UploadQueueResponse:
207
+ resp = self._request("GET", f"/watchers/{watcher_id}/upload-queue")
208
+ return UploadQueueResponse.model_validate(resp.json())
209
+
210
+ def request_upload_url(
211
+ self,
212
+ instrument_id: str,
213
+ run_id: str,
214
+ filename: str,
215
+ content_type: str | None = None,
216
+ size_bytes: int | None = None,
217
+ file_created_at_ts: float | None = None,
218
+ ) -> PresignedUploadResponse:
219
+ payload: dict[str, Any] = {"filename": filename}
220
+ if content_type:
221
+ payload["content_type"] = content_type
222
+ if size_bytes is not None:
223
+ payload["size_bytes"] = size_bytes
224
+ if file_created_at_ts:
225
+ payload["file_created_at"] = datetime.fromtimestamp(
226
+ file_created_at_ts, tz=timezone.utc
227
+ ).isoformat()
228
+ resp = self._request(
229
+ "POST",
230
+ f"/instruments/{instrument_id}/runs/{run_id}/request-upload-url",
231
+ json=payload,
232
+ )
233
+ return PresignedUploadResponse.model_validate(resp.json())
234
+
235
+ def mark_file_uploaded(self, file_id: int, s3_info: dict[str, Any]) -> FileResponse:
236
+ resp = self._request("PATCH", f"/files/{file_id}", json=s3_info)
237
+ return FileResponse.model_validate(resp.json())