mercuto-client 0.2.5__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mercuto-client might be problematic. Click here for more details.
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/PKG-INFO +1 -1
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/__main__.py +3 -127
- mercuto_client-0.2.7/mercuto_client/ingester/mercuto.py +155 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client.egg-info/PKG-INFO +1 -1
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client.egg-info/SOURCES.txt +1 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/pyproject.toml +1 -1
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/LICENSE +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/README.md +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/__init__.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_tests/__init__.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_tests/conftest.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_tests/test_ingester/__init__.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_tests/test_ingester/test_file_processor.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_tests/test_ingester/test_ftp.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_tests/test_ingester/test_parsers.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_tests/test_mocking.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_util.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/acl.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/client.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/exceptions.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/__init__.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/ftp.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/parsers/__init__.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/parsers/campbell.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/parsers/generic_csv.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/parsers/worldsensing.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/processor.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/util.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/mocks.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/py.typed +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/types.py +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client.egg-info/dependency_links.txt +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client.egg-info/requires.txt +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client.egg-info/top_level.txt +0 -0
- {mercuto_client-0.2.5 → mercuto_client-0.2.7}/setup.cfg +0 -0
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
import argparse
|
|
2
|
-
import fnmatch
|
|
3
|
-
import itertools
|
|
4
2
|
import logging
|
|
5
3
|
import logging.handlers
|
|
6
4
|
import os
|
|
@@ -10,135 +8,13 @@ from typing import Callable, TypeVar
|
|
|
10
8
|
|
|
11
9
|
import schedule
|
|
12
10
|
|
|
13
|
-
from .. import MercutoClient, MercutoHTTPException
|
|
14
|
-
from ..types import DataSample
|
|
15
11
|
from .ftp import simple_ftp_server
|
|
16
|
-
from .
|
|
12
|
+
from .mercuto import MercutoIngester
|
|
17
13
|
from .processor import FileProcessor
|
|
18
|
-
from .util import
|
|
14
|
+
from .util import get_free_space_excluding_files
|
|
19
15
|
|
|
20
16
|
logger = logging.getLogger(__name__)
|
|
21
17
|
|
|
22
|
-
NON_RETRYABLE_ERRORS = {400, 404, 409} # HTTP status codes that indicate non-retryable errors
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class MercutoIngester:
|
|
26
|
-
def __init__(self, project_code: str, api_key: str, hostname: str = 'https://api.rockfieldcloud.com.au') -> None:
|
|
27
|
-
self._client = MercutoClient(url=hostname)
|
|
28
|
-
self._api_key = api_key
|
|
29
|
-
with self._client.as_credentials(api_key=api_key) as client:
|
|
30
|
-
self._project = client.projects().get_project(project_code)
|
|
31
|
-
assert self._project['code'] == project_code
|
|
32
|
-
|
|
33
|
-
self._secondary_channels = client.channels().get_channels(project_code, classification='SECONDARY')
|
|
34
|
-
self._datatables = list(itertools.chain.from_iterable([dt['datatables'] for dt in client.devices().list_dataloggers(project_code)]))
|
|
35
|
-
|
|
36
|
-
self._channel_map = {c['label']: c['code'] for c in self._secondary_channels}
|
|
37
|
-
|
|
38
|
-
def update_mapping(self, mapping: dict[str, str]) -> None:
|
|
39
|
-
"""
|
|
40
|
-
Update the channel label to channel code mapping.
|
|
41
|
-
"""
|
|
42
|
-
self._channel_map.update(mapping)
|
|
43
|
-
logger.info(f"Updated channel mapping: {self._channel_map}")
|
|
44
|
-
|
|
45
|
-
@property
|
|
46
|
-
def project_code(self) -> str:
|
|
47
|
-
return self._project['code']
|
|
48
|
-
|
|
49
|
-
def ping(self) -> None:
|
|
50
|
-
"""
|
|
51
|
-
Ping the Mercuto serverto update the last seen IP address.
|
|
52
|
-
"""
|
|
53
|
-
ip = get_my_public_ip()
|
|
54
|
-
with self._client.as_credentials(api_key=self._api_key) as client:
|
|
55
|
-
client.projects().ping_project(self.project_code, ip_address=ip)
|
|
56
|
-
logging.info(f"Pinged Mercuto server from IP: {ip} for project: {self.project_code}")
|
|
57
|
-
|
|
58
|
-
def matching_datatable(self, filename: str) -> str | None:
|
|
59
|
-
"""
|
|
60
|
-
Check if any datatables on the project match this file name.
|
|
61
|
-
Returns the datatable code if a match is found, otherwise None.
|
|
62
|
-
"""
|
|
63
|
-
basename = os.path.basename(filename)
|
|
64
|
-
|
|
65
|
-
def matches(test: str) -> bool:
|
|
66
|
-
"""
|
|
67
|
-
test should be a pattern or a filename.
|
|
68
|
-
E.g. "my_data.csv" or "my_data*.csv", or "/path/to/my_data*.csv"
|
|
69
|
-
Do wildcard matching as well as prefix matching.
|
|
70
|
-
"""
|
|
71
|
-
test_base = os.path.basename(test)
|
|
72
|
-
if fnmatch.fnmatch(basename, test_base):
|
|
73
|
-
return True
|
|
74
|
-
lhs, _ = os.path.splitext(test_base)
|
|
75
|
-
if basename.startswith(lhs):
|
|
76
|
-
return True
|
|
77
|
-
return False
|
|
78
|
-
|
|
79
|
-
for dt in self._datatables:
|
|
80
|
-
# Match using datatable pattern
|
|
81
|
-
if matches(dt['name']):
|
|
82
|
-
return dt['code']
|
|
83
|
-
if dt['src'] and matches(dt['src']):
|
|
84
|
-
return dt['code']
|
|
85
|
-
return None
|
|
86
|
-
|
|
87
|
-
def _upload_samples(self, samples: list[DataSample]) -> bool:
|
|
88
|
-
"""
|
|
89
|
-
Upload samples to the Mercuto project.
|
|
90
|
-
"""
|
|
91
|
-
try:
|
|
92
|
-
with self._client.as_credentials(api_key=self._api_key) as client:
|
|
93
|
-
for batch in batched(samples, 500):
|
|
94
|
-
client.data().upload_samples(batch)
|
|
95
|
-
return True
|
|
96
|
-
except MercutoHTTPException as e:
|
|
97
|
-
if e.status_code in NON_RETRYABLE_ERRORS:
|
|
98
|
-
logger.exception(
|
|
99
|
-
"Error indicates bad file that should not be retried. Skipping.")
|
|
100
|
-
return True
|
|
101
|
-
else:
|
|
102
|
-
return False
|
|
103
|
-
|
|
104
|
-
def _upload_file(self, file_path: str, datatable_code: str) -> bool:
|
|
105
|
-
"""
|
|
106
|
-
Upload a file to the Mercuto project.
|
|
107
|
-
"""
|
|
108
|
-
logging.info(f"Uploadeding file {file_path} to datatable {datatable_code} in project {self.project_code}")
|
|
109
|
-
try:
|
|
110
|
-
with self._client.as_credentials(api_key=self._api_key) as client:
|
|
111
|
-
client.data().upload_file(
|
|
112
|
-
project=self.project_code,
|
|
113
|
-
datatable=datatable_code,
|
|
114
|
-
file=file_path,
|
|
115
|
-
)
|
|
116
|
-
return True
|
|
117
|
-
except MercutoHTTPException as e:
|
|
118
|
-
if e.status_code in NON_RETRYABLE_ERRORS:
|
|
119
|
-
logger.exception(
|
|
120
|
-
"Error indicates bad file that should not be retried. Skipping.")
|
|
121
|
-
return True
|
|
122
|
-
else:
|
|
123
|
-
return False
|
|
124
|
-
|
|
125
|
-
def process_file(self, file_path: str) -> bool:
|
|
126
|
-
"""
|
|
127
|
-
Process the received file.
|
|
128
|
-
"""
|
|
129
|
-
logging.info(f"Processing file: {file_path}")
|
|
130
|
-
datatable_code = self.matching_datatable(file_path)
|
|
131
|
-
if datatable_code:
|
|
132
|
-
logger.info(f"Matched datatable code: {datatable_code} for file: {file_path}")
|
|
133
|
-
return self._upload_file(file_path, datatable_code)
|
|
134
|
-
else:
|
|
135
|
-
parser = detect_parser(file_path)
|
|
136
|
-
samples = parser(file_path, self._channel_map)
|
|
137
|
-
if not samples:
|
|
138
|
-
logging.warning(f"No samples found in file: {file_path}")
|
|
139
|
-
return True
|
|
140
|
-
return self._upload_samples(samples)
|
|
141
|
-
|
|
142
18
|
|
|
143
19
|
T = TypeVar('T')
|
|
144
20
|
|
|
@@ -277,7 +153,7 @@ if __name__ == '__main__':
|
|
|
277
153
|
with simple_ftp_server(directory=buffer_directory,
|
|
278
154
|
username=args.username, password=args.password, port=args.port,
|
|
279
155
|
callback=processor.add_file_to_db, rename=not args.no_rename,
|
|
280
|
-
workdir=
|
|
156
|
+
workdir=ftp_dir):
|
|
281
157
|
schedule.every(60).seconds.do(call_and_log_error, ingester.ping)
|
|
282
158
|
schedule.every(5).seconds.do(call_and_log_error, processor.process_next_file)
|
|
283
159
|
schedule.every(2).minutes.do(call_and_log_error, processor.cleanup_old_files)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import fnmatch
|
|
2
|
+
import itertools
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from .. import MercutoClient, MercutoHTTPException
|
|
8
|
+
from ..types import Channel, DataSample, DatatableOut, Project
|
|
9
|
+
from .parsers import detect_parser
|
|
10
|
+
from .util import batched, get_my_public_ip
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
NON_RETRYABLE_ERRORS = {400, 404, 409} # HTTP status codes that indicate non-retryable errors
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MercutoIngester:
|
|
18
|
+
def __init__(self, project_code: str, api_key: str, hostname: str = 'https://api.rockfieldcloud.com.au') -> None:
|
|
19
|
+
self._client = MercutoClient(url=hostname)
|
|
20
|
+
self._api_key = api_key
|
|
21
|
+
self._project_code = project_code
|
|
22
|
+
|
|
23
|
+
self._project: Optional[Project] = None
|
|
24
|
+
self._secondary_channels: Optional[list[Channel]] = None
|
|
25
|
+
self._datatables: Optional[list[DatatableOut]] = None
|
|
26
|
+
|
|
27
|
+
self._channel_map: dict[str, str] = {}
|
|
28
|
+
|
|
29
|
+
def _refresh_mercuto_data(self) -> None:
|
|
30
|
+
with self._client.as_credentials(api_key=self._api_key) as client:
|
|
31
|
+
self._project = client.projects().get_project(self._project_code)
|
|
32
|
+
assert self._project['code'] == self._project_code
|
|
33
|
+
|
|
34
|
+
self._secondary_channels = client.channels().get_channels(self._project_code, classification='SECONDARY')
|
|
35
|
+
self._datatables = list(itertools.chain.from_iterable([dt['datatables'] for dt in client.devices().list_dataloggers(self._project_code)]))
|
|
36
|
+
|
|
37
|
+
self._channel_map.update({c['label']: c['code'] for c in self._secondary_channels})
|
|
38
|
+
|
|
39
|
+
def _can_process(self) -> bool:
|
|
40
|
+
return self._project is not None and self._secondary_channels is not None and self._datatables is not None
|
|
41
|
+
|
|
42
|
+
def update_mapping(self, mapping: dict[str, str]) -> None:
|
|
43
|
+
"""
|
|
44
|
+
Update the channel label to channel code mapping.
|
|
45
|
+
"""
|
|
46
|
+
self._channel_map.update(mapping)
|
|
47
|
+
logger.info(f"Updated channel mapping: {self._channel_map}")
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def project_code(self) -> str:
|
|
51
|
+
return self._project_code
|
|
52
|
+
|
|
53
|
+
def ping(self) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Ping the Mercuto serverto update the last seen IP address.
|
|
56
|
+
"""
|
|
57
|
+
ip = get_my_public_ip()
|
|
58
|
+
with self._client.as_credentials(api_key=self._api_key) as client:
|
|
59
|
+
client.projects().ping_project(self.project_code, ip_address=ip)
|
|
60
|
+
logging.info(f"Pinged Mercuto server from IP: {ip} for project: {self.project_code}")
|
|
61
|
+
|
|
62
|
+
def matching_datatable(self, filename: str) -> str | None:
|
|
63
|
+
"""
|
|
64
|
+
Check if any datatables on the project match this file name.
|
|
65
|
+
Returns the datatable code if a match is found, otherwise None.
|
|
66
|
+
"""
|
|
67
|
+
if self._datatables is None:
|
|
68
|
+
raise ValueError("Datatables not loaded. Call _refresh_mercuto_data() first.")
|
|
69
|
+
|
|
70
|
+
basename = os.path.basename(filename)
|
|
71
|
+
|
|
72
|
+
def matches(test: str) -> bool:
|
|
73
|
+
"""
|
|
74
|
+
test should be a pattern or a filename.
|
|
75
|
+
E.g. "my_data.csv" or "my_data*.csv", or "/path/to/my_data*.csv"
|
|
76
|
+
Do wildcard matching as well as prefix matching.
|
|
77
|
+
"""
|
|
78
|
+
test_base = os.path.basename(test)
|
|
79
|
+
if fnmatch.fnmatch(basename, test_base):
|
|
80
|
+
return True
|
|
81
|
+
lhs, _ = os.path.splitext(test_base)
|
|
82
|
+
if basename.startswith(lhs):
|
|
83
|
+
return True
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
for dt in self._datatables:
|
|
87
|
+
# Match using datatable pattern
|
|
88
|
+
if matches(dt['name']):
|
|
89
|
+
return dt['code']
|
|
90
|
+
if dt['src'] and matches(dt['src']):
|
|
91
|
+
return dt['code']
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
def _upload_samples(self, samples: list[DataSample]) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Upload samples to the Mercuto project.
|
|
97
|
+
"""
|
|
98
|
+
try:
|
|
99
|
+
with self._client.as_credentials(api_key=self._api_key) as client:
|
|
100
|
+
for batch in batched(samples, 500):
|
|
101
|
+
client.data().upload_samples(batch)
|
|
102
|
+
return True
|
|
103
|
+
except MercutoHTTPException as e:
|
|
104
|
+
if e.status_code in NON_RETRYABLE_ERRORS:
|
|
105
|
+
logger.exception(
|
|
106
|
+
"Error indicates bad file that should not be retried. Skipping.")
|
|
107
|
+
return True
|
|
108
|
+
else:
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
def _upload_file(self, file_path: str, datatable_code: str) -> bool:
|
|
112
|
+
"""
|
|
113
|
+
Upload a file to the Mercuto project.
|
|
114
|
+
"""
|
|
115
|
+
logging.info(f"Uploadeding file {file_path} to datatable {datatable_code} in project {self.project_code}")
|
|
116
|
+
try:
|
|
117
|
+
with self._client.as_credentials(api_key=self._api_key) as client:
|
|
118
|
+
client.data().upload_file(
|
|
119
|
+
project=self.project_code,
|
|
120
|
+
datatable=datatable_code,
|
|
121
|
+
file=file_path,
|
|
122
|
+
)
|
|
123
|
+
return True
|
|
124
|
+
except MercutoHTTPException as e:
|
|
125
|
+
if e.status_code in NON_RETRYABLE_ERRORS:
|
|
126
|
+
logger.exception(
|
|
127
|
+
"Error indicates bad file that should not be retried. Skipping.")
|
|
128
|
+
return True
|
|
129
|
+
else:
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
def process_file(self, file_path: str) -> bool:
|
|
133
|
+
"""
|
|
134
|
+
Process the received file.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
if not self._can_process():
|
|
138
|
+
logging.info("Refreshing Mercuto data...")
|
|
139
|
+
self._refresh_mercuto_data()
|
|
140
|
+
if not self._can_process():
|
|
141
|
+
logging.error("Failed to refresh Mercuto data. Cannot process file yet.")
|
|
142
|
+
return False
|
|
143
|
+
|
|
144
|
+
logging.info(f"Processing file: {file_path}")
|
|
145
|
+
datatable_code = self.matching_datatable(file_path)
|
|
146
|
+
if datatable_code:
|
|
147
|
+
logger.info(f"Matched datatable code: {datatable_code} for file: {file_path}")
|
|
148
|
+
return self._upload_file(file_path, datatable_code)
|
|
149
|
+
else:
|
|
150
|
+
parser = detect_parser(file_path)
|
|
151
|
+
samples = parser(file_path, self._channel_map)
|
|
152
|
+
if not samples:
|
|
153
|
+
logging.warning(f"No samples found in file: {file_path}")
|
|
154
|
+
return True
|
|
155
|
+
return self._upload_samples(samples)
|
|
@@ -24,6 +24,7 @@ mercuto_client/_tests/test_ingester/test_parsers.py
|
|
|
24
24
|
mercuto_client/ingester/__init__.py
|
|
25
25
|
mercuto_client/ingester/__main__.py
|
|
26
26
|
mercuto_client/ingester/ftp.py
|
|
27
|
+
mercuto_client/ingester/mercuto.py
|
|
27
28
|
mercuto_client/ingester/processor.py
|
|
28
29
|
mercuto_client/ingester/util.py
|
|
29
30
|
mercuto_client/ingester/parsers/__init__.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_tests/test_ingester/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_tests/test_ingester/test_ftp.py
RENAMED
|
File without changes
|
{mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/_tests/test_ingester/test_parsers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/parsers/generic_csv.py
RENAMED
|
File without changes
|
{mercuto_client-0.2.5 → mercuto_client-0.2.7}/mercuto_client/ingester/parsers/worldsensing.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|