nefino-geosync 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,199 @@
1
+ import json
2
+ import os
3
+ import re
4
+ from .storage import get_app_directory
5
+ from datetime import datetime, timezone
6
+ from typing import Dict, Set
7
+
8
+
9
+ class Journal:
10
+ """Handles metadata about analyses for efficient downloading."""
11
+
12
+ # This is a singleton class. There should only be one instance of Journal.
13
+ _instance = None
14
+
15
+ @classmethod
16
+ def singleton(cls):
17
+ """Returns the singleton instance of Journal."""
18
+ if not cls._instance:
19
+ cls._instance = Journal()
20
+ return cls._instance
21
+
22
+ def __init__(self) -> None:
23
+ if Journal._instance:
24
+ raise Exception('Journal is a singleton class. Use Journal.singleton() to get the instance.')
25
+ # Mapping from analysis pk to the state where the analysis was started
26
+ self.analysis_states: Dict[str, str] = dict()
27
+
28
+ # Mapping from analysis pk to the layers that were requested
29
+ self.analysis_requested_layers: Dict[str, Set[str]] = dict()
30
+
31
+ # Mapping from layer name to where it was last updated and when
32
+ self.layer_last_updates: Dict[str, Dict[str, datetime]] = dict()
33
+
34
+ # Record which analyses have been successfully started
35
+ self.synced_analyses: Set[str] = set()
36
+
37
+ # Track the last successful geosync run for changelog queries
38
+ self.last_geosync_run: datetime = None
39
+
40
+ self.load_analysis_states()
41
+ self.load_analysis_requested_layers()
42
+ self.load_layer_last_updates()
43
+ self.load_synced_analyses()
44
+ self.load_last_geosync_run()
45
+
46
+ def save_analysis_states(self) -> None:
47
+ """Saves the analysis states to a file."""
48
+ with open(os.path.join(get_app_directory(), 'analysis_states.json'), 'w') as f:
49
+ json.dump(self.analysis_states, f)
50
+
51
+ def load_analysis_states(self) -> None:
52
+ """Loads the analysis states from a file."""
53
+ try:
54
+ with open(os.path.join(get_app_directory(), 'analysis_states.json'), 'r') as f:
55
+ self.analysis_states = json.load(f)
56
+ except FileNotFoundError:
57
+ # we already have an empty dictionary as the field value
58
+ print('No saved analysis states found.')
59
+
60
+ def save_analysis_requested_layers(self) -> None:
61
+ """Saves the analysis requested layers to a file."""
62
+ # Convert sets to lists for JSON serialization
63
+ serializable_data = {pk: list(layers) for pk, layers in self.analysis_requested_layers.items()}
64
+ with open(os.path.join(get_app_directory(), 'analysis_requested_layers.json'), 'w') as f:
65
+ json.dump(serializable_data, f)
66
+
67
+ def load_analysis_requested_layers(self) -> None:
68
+ """Loads the analysis requested layers from a file."""
69
+ try:
70
+ with open(os.path.join(get_app_directory(), 'analysis_requested_layers.json'), 'r') as f:
71
+ data = json.load(f)
72
+ # Convert lists back to sets
73
+ self.analysis_requested_layers = {pk: set(layers) for pk, layers in data.items()}
74
+ except FileNotFoundError:
75
+ # we already have an empty dictionary as the field value
76
+ print('No saved analysis requested layers found.')
77
+
78
+ def save_layer_last_updates(self) -> None:
79
+ """Saves the layer last updates to a file."""
80
+ with open(os.path.join(get_app_directory(), 'layer_last_updates.json'), 'w') as f:
81
+ json.dump(self.layer_last_updates, f, default=lambda x: x.isoformat())
82
+
83
+ def load_layer_last_updates(self) -> None:
84
+ """Loads the layer last updates from a file."""
85
+ try:
86
+ with open(os.path.join(get_app_directory(), 'layer_last_updates.json'), 'r') as f:
87
+ self.layer_last_updates = json.load(f)
88
+ for cluster in self.layer_last_updates.values():
89
+ for state, timestamp in cluster.items():
90
+ cluster[state] = datetime.fromisoformat(timestamp) if timestamp else None
91
+ except FileNotFoundError:
92
+ # we already have an empty dictionary as the field value
93
+ print('No saved layer last updates found.')
94
+
95
+ def save_synced_analyses(self) -> None:
96
+ """Saves the list of processed analyses to a file."""
97
+ with open(os.path.join(get_app_directory(), 'synced_analyses.json'), 'w') as f:
98
+ json.dump(list(self.synced_analyses), f)
99
+
100
+ def load_synced_analyses(self) -> None:
101
+ """Loads the list of processed analyses from a file."""
102
+ try:
103
+ with open(os.path.join(get_app_directory(), 'synced_analyses.json'), 'r') as f:
104
+ self.synced_analyses = set(json.load(f))
105
+ except FileNotFoundError:
106
+ # we already have an empty set as the field value
107
+ print('No saved downloaded analyses found.')
108
+
109
+ def save_last_geosync_run(self) -> None:
110
+ """Saves the timestamp of the last successful geosync run."""
111
+ with open(os.path.join(get_app_directory(), 'last_geosync_run.json'), 'w') as f:
112
+ json.dump(self.last_geosync_run.isoformat() if self.last_geosync_run else None, f)
113
+
114
+ def load_last_geosync_run(self) -> None:
115
+ """Loads the timestamp of the last successful geosync run."""
116
+ try:
117
+ with open(os.path.join(get_app_directory(), 'last_geosync_run.json'), 'r') as f:
118
+ timestamp_str = json.load(f)
119
+ self.last_geosync_run = datetime.fromisoformat(timestamp_str) if timestamp_str else None
120
+ except FileNotFoundError:
121
+ # we already have None as the field value
122
+ print('No saved last geosync run timestamp found.')
123
+
124
+ def record_successful_geosync_run(self, start_time: datetime) -> None:
125
+ """Records the current time as the last successful geosync run."""
126
+ self.last_geosync_run = start_time
127
+ self.save_last_geosync_run()
128
+
129
+ def record_analyses_requested(self, start_analyses_result, analysis_inputs) -> None:
130
+ """Records the analyses that have been started, where they were started, and which layers were requested."""
131
+ pattern = r'^startAnalysis_(?P<state>DE[1-9A-G])$'
132
+ for alias, analysis_metadata in start_analyses_result.__dict__.items():
133
+ match = re.match(pattern, alias)
134
+ if not match:
135
+ continue
136
+ state = match.group('state')
137
+ # record where the analysis was started
138
+ self.analysis_states[analysis_metadata.pk] = state
139
+ # record which layers were requested
140
+ requested_layers = set()
141
+ for request in analysis_inputs[state].specs.requests:
142
+ for layer in request.layers:
143
+ requested_layers.add(layer.layer_name)
144
+ self.analysis_requested_layers[analysis_metadata.pk] = requested_layers
145
+ self.save_analysis_states()
146
+ self.save_analysis_requested_layers()
147
+
148
+ def clear_analysis_requested_layers(self) -> None:
149
+ """Clears all analysis requested layers at the start of a new run."""
150
+ if self.analysis_requested_layers:
151
+ print(
152
+ f"Clearing {len(self.analysis_requested_layers)} old analysis metadata entries from previous runs"
153
+ )
154
+ self.analysis_requested_layers.clear()
155
+ self.save_analysis_requested_layers()
156
+
157
+ def record_layers_unpacked(self, layers: Set[str], state: str, started_at: datetime) -> None:
158
+ """Records the layers that have been unpacked, and when they were last updated."""
159
+ print(f'Recording layers {layers} as unpacked for state {state}')
160
+
161
+ for layer in layers:
162
+ if layer not in self.layer_last_updates:
163
+ self.layer_last_updates[layer] = dict()
164
+ self.layer_last_updates[layer][state] = started_at
165
+ self.save_layer_last_updates()
166
+
167
+ def get_state_for_analysis(self, pk: str) -> str:
168
+ """Returns the state where the analysis was started."""
169
+ return self.analysis_states[pk]
170
+
171
+ def is_newer_than_saved(self, layer: str, state: str, timestamp: datetime) -> bool:
172
+ """Checks if the layer needs to be unpacked."""
173
+ if layer not in self.layer_last_updates:
174
+ return True
175
+ if state not in self.layer_last_updates[layer]:
176
+ return True
177
+ if not self.layer_last_updates[layer][state]:
178
+ return True
179
+
180
+ saved_timestamp = self.layer_last_updates[layer][state]
181
+
182
+ # Handle timezone comparison issues by making both timezone-aware
183
+ if saved_timestamp.tzinfo is None and timestamp.tzinfo is not None:
184
+ # Assume saved timestamp is UTC if it has no timezone info
185
+ saved_timestamp = saved_timestamp.replace(tzinfo=timezone.utc)
186
+ elif saved_timestamp.tzinfo is not None and timestamp.tzinfo is None:
187
+ # Make the API timestamp timezone-aware (assume UTC)
188
+ timestamp = timestamp.replace(tzinfo=timezone.utc)
189
+
190
+ return saved_timestamp < timestamp
191
+
192
+ def record_analysis_synced(self, pk: str) -> None:
193
+ """Records that the analysis has been downloaded and unpacked."""
194
+ self.synced_analyses.add(pk)
195
+ self.save_synced_analyses()
196
+ # Clean up the requested layers for this analysis to prevent unbounded growth
197
+ if pk in self.analysis_requested_layers:
198
+ del self.analysis_requested_layers[pk]
199
+ self.save_analysis_requested_layers()
@@ -0,0 +1,240 @@
1
+ """Module for querying and logging layer changelog information."""
2
+
3
+ import csv
4
+ import os
5
+ from .api_client import general_availability_operation, layer_changelog_operation
6
+ from .config import Config
7
+ from .graphql_errors import check_errors
8
+ from .journal import Journal
9
+ from .schema import LayerChangelogEntry
10
+ from datetime import datetime, timezone
11
+ from nefino_geosync.access_rule_filter import AccessRuleFilter
12
+ from sgqlc.endpoint.http import HTTPEndpoint
13
+ from typing import Any
14
+
15
+ LayerChangelogResult = Any
16
+
17
+
18
+ def query_layer_changelog(client: HTTPEndpoint, timestamp_start: str = None) -> LayerChangelogResult:
19
+ """Queries the layer changelog from the GraphQL API."""
20
+ changelog_op = layer_changelog_operation(timestamp_start)
21
+ changelog_data = client(changelog_op)
22
+ check_errors(changelog_data, 'Failed to fetch layer changelog')
23
+ return changelog_op + changelog_data
24
+
25
+
26
+ def record_layer_changes_since_last_run(client: HTTPEndpoint) -> LayerChangelogResult:
27
+ """Logs all layer changes since the last successful geosync run and returns the changelog data."""
28
+ journal = Journal.singleton()
29
+
30
+ # Get the timestamp of the last successful run
31
+ timestamp_start = None
32
+ if journal.last_geosync_run:
33
+ # Format timestamp as required: 2025-09-19T10:30:20.383210+00:00
34
+ if journal.last_geosync_run.tzinfo is None:
35
+ # If timezone-naive, assume UTC
36
+
37
+ aware_timestamp = journal.last_geosync_run.replace(tzinfo=timezone.utc)
38
+ else:
39
+ aware_timestamp = journal.last_geosync_run
40
+
41
+ timestamp_start = aware_timestamp.strftime('%Y-%m-%dT%H:%M:%S.%f%z')
42
+ # Ensure the timezone format includes the colon (e.g., +00:00 not +0000)
43
+ if len(timestamp_start) >= 4 and timestamp_start[-4:].isdigit():
44
+ timestamp_start = timestamp_start[:-2] + ':' + timestamp_start[-2:]
45
+
46
+ print(f'Checking for layer changes since last geosync run: {timestamp_start}')
47
+ else:
48
+ print('No previous geosync run found, skipping changelog check')
49
+ return None
50
+
51
+ # Get available clusters to filter changelog results
52
+ try:
53
+ # First get general availability to determine accessible clusters
54
+ general_op = general_availability_operation()
55
+ general_data = client(general_op)
56
+ check_errors(general_data, 'Failed to fetch general availability for changelog filtering')
57
+ general_availability = general_op + general_data
58
+
59
+ # Use AccessRuleFilter to determine accessible clusters
60
+ rules = AccessRuleFilter(general_availability.access_rules)
61
+
62
+ # Get all places from access rules to check against
63
+ all_places = set()
64
+ for rule in general_availability.access_rules:
65
+ all_places.update(rule.places)
66
+
67
+ accessible_clusters = {
68
+ cluster.name
69
+ for cluster in general_availability.clusters
70
+ if cluster.has_access and any(rules.check(place, cluster.name) for place in all_places)
71
+ }
72
+ print(f'Accessible clusters: {accessible_clusters}')
73
+
74
+ except Exception as e:
75
+ print(f'Failed to fetch accessible clusters, showing all changelog entries: {e}')
76
+ accessible_clusters = set() # Empty set means show all
77
+
78
+ # Query the changelog
79
+ try:
80
+ changelog_result = query_layer_changelog(client, timestamp_start)
81
+ log_changelog_entries(changelog_result, accessible_clusters)
82
+ return changelog_result
83
+ except Exception as e:
84
+ print(f'Failed to retrieve layer changelog: {e}')
85
+ return None
86
+
87
+
88
+ def log_changelog_entries(changelog_result: LayerChangelogResult, accessible_clusters: set = None) -> None:
89
+ """Logs changelog entries, focusing on relevant changes and filtering by accessible clusters."""
90
+ if not hasattr(changelog_result, 'layer_changelog'):
91
+ print('No layer changelog data received')
92
+ return
93
+
94
+ changelog_entries = changelog_result.layer_changelog
95
+ if not changelog_entries:
96
+ print('✅ No layer changes detected for accessible clusters since last run')
97
+ return
98
+
99
+ # Filter entries by accessible clusters and collect those with relevant changes
100
+ relevant_entries = []
101
+ for entry in changelog_entries:
102
+ # Skip if not in accessible clusters
103
+ cluster_name = getattr(entry, 'cluster_name', None)
104
+ if accessible_clusters and (not cluster_name or cluster_name not in accessible_clusters):
105
+ continue
106
+
107
+ # Check for relevant changes
108
+ relevant_changes = _get_relevant_changes(entry)
109
+ if relevant_changes:
110
+ relevant_entries.append((entry, relevant_changes))
111
+
112
+ # If no relevant changes found, show success message
113
+ if not relevant_entries:
114
+ print('✅ No layer changes detected for accessible clusters since last run')
115
+ return
116
+
117
+ print(f'📋 Found {len(relevant_entries)} layer change(s) for accessible clusters since last run:')
118
+
119
+ for entry, relevant_changes in relevant_entries:
120
+ _log_entry_details(entry, relevant_changes)
121
+
122
+ # Save to CSV (use original filtered entries for CSV)
123
+ filtered_entries = [entry for entry, _ in relevant_entries]
124
+ save_changelog_to_csv(filtered_entries)
125
+
126
+
127
+ def _get_relevant_changes(entry: LayerChangelogEntry) -> list:
128
+ """Extract relevant changes from a changelog entry."""
129
+ relevant_changes = []
130
+ if hasattr(entry, 'changed_fields') and entry.changed_fields:
131
+ for field in entry.changed_fields:
132
+ if field in ['attributes', 'layer_name', 'cluster_name']:
133
+ relevant_changes.append(field)
134
+ return relevant_changes
135
+
136
+
137
+ def _log_entry_details(entry: LayerChangelogEntry, relevant_changes: list) -> None:
138
+ """Log details for a single changelog entry."""
139
+ layer_name = getattr(entry, 'layer_name', 'Unknown')
140
+ cluster_name = getattr(entry, 'cluster_name', 'Unknown')
141
+ action = getattr(entry, 'action', 'Unknown')
142
+ timestamp = getattr(entry, 'timestamp', 'Unknown')
143
+
144
+ print(f" 📦 Layer '{layer_name}' (cluster: {cluster_name})")
145
+ print(f' Action: {action}')
146
+ print(f' Changed fields: {", ".join(relevant_changes)}')
147
+ print(f' Timestamp: {timestamp}')
148
+
149
+ # If attributes changed, log the attributes
150
+ if 'attributes' in relevant_changes and hasattr(entry, 'attributes') and entry.attributes:
151
+ print(f' New attributes: {", ".join(entry.attributes)}')
152
+
153
+ print('') # Empty line for readability
154
+
155
+
156
+ def save_changelog_to_csv(filtered_entries: list) -> None:
157
+ """Saves changelog entries to a CSV file in the output directory."""
158
+ if not filtered_entries:
159
+ return
160
+
161
+ config = Config.singleton()
162
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
163
+ csv_filename = f'layer_changelog_{timestamp}.csv'
164
+ csv_path = os.path.join(config.output_path, csv_filename)
165
+
166
+ # Ensure output directory exists
167
+ os.makedirs(config.output_path, exist_ok=True)
168
+
169
+ # Define CSV headers
170
+ headers = ['timestamp', 'layer_name', 'cluster_name', 'action', 'changed_fields', 'attributes']
171
+
172
+ try:
173
+ with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
174
+ writer = csv.DictWriter(csvfile, fieldnames=headers)
175
+ writer.writeheader()
176
+
177
+ for entry in filtered_entries:
178
+ # Filter for relevant changes
179
+ relevant_changes = []
180
+ if hasattr(entry, 'changed_fields') and entry.changed_fields:
181
+ for field in entry.changed_fields:
182
+ if field in ['attributes', 'layer_name', 'cluster_name']:
183
+ relevant_changes.append(field)
184
+
185
+ if not relevant_changes:
186
+ continue # Skip entries without relevant changes
187
+
188
+ # Extract entry data
189
+ layer_name = getattr(entry, 'layer_name', 'Unknown')
190
+ cluster_name = getattr(entry, 'cluster_name', 'Unknown')
191
+ action = getattr(entry, 'action', 'Unknown')
192
+ timestamp_str = getattr(entry, 'timestamp', 'Unknown')
193
+ attributes = ''
194
+ if hasattr(entry, 'attributes') and entry.attributes:
195
+ attributes = ', '.join(entry.attributes)
196
+
197
+ writer.writerow(
198
+ {
199
+ 'timestamp': timestamp_str,
200
+ 'layer_name': layer_name,
201
+ 'cluster_name': cluster_name,
202
+ 'action': action,
203
+ 'changed_fields': ', '.join(relevant_changes),
204
+ 'attributes': attributes,
205
+ }
206
+ )
207
+
208
+ print(f'📊 Changelog saved to CSV: {csv_path}')
209
+ except Exception as e:
210
+ print(f'⚠️ Failed to save changelog to CSV: {e}')
211
+
212
+
213
+ def record_successful_geosync_completion(start_time: datetime) -> None:
214
+ """Records that a geosync run completed successfully."""
215
+ journal = Journal.singleton()
216
+ journal.record_successful_geosync_run(start_time)
217
+ print('✅ Geosync completed successfully')
218
+
219
+
220
+ def layer_has_relevant_changes_in_changelog(
221
+ changelog_result: LayerChangelogResult, layer_name: str, cluster_name: str
222
+ ) -> bool:
223
+ """Check if a layer has relevant changes in the provided changelog data."""
224
+ if not changelog_result or not hasattr(changelog_result, 'layer_changelog'):
225
+ return False
226
+
227
+ if not changelog_result.layer_changelog:
228
+ return False
229
+
230
+ # Check if this specific layer has relevant changes
231
+ for entry in changelog_result.layer_changelog:
232
+ entry_layer_name = getattr(entry, 'layer_name', None)
233
+ entry_cluster_name = getattr(entry, 'cluster_name', None)
234
+
235
+ if entry_layer_name == layer_name and entry_cluster_name == cluster_name:
236
+ relevant_changes = _get_relevant_changes(entry)
237
+ if relevant_changes:
238
+ return True
239
+
240
+ return False
@@ -0,0 +1,15 @@
1
+ import argparse
2
+
3
+ def parse_args(cached=[]):
4
+ if len(cached) > 0:
5
+ return cached[0]
6
+ parser = argparse.ArgumentParser(
7
+ prog="Nefino GeoSync",
8
+ description='Download available geodata from the Nefino API.',
9
+ epilog='If you have further questions please reach out to us! The maintainers for this tool can be found on https://github.com/nefino/geosync-py.')
10
+ parser.add_argument('-c', '--configure', action='store_true', help='Edit your existing configuration. The first-run wizard will be shown again, with your existing configuration pre-filled.')
11
+ parser.add_argument('-r', '--resume', action='store_true', help='Resume checking for completed analyses and downloading them. This will skip the analysis start step.')
12
+ parser.add_argument('-v', '--verbose', action='store_true', help='Print more information to the console.')
13
+ args = parser.parse_args()
14
+ cached.append(args)
15
+ return args
nefino_geosync/run.py ADDED
@@ -0,0 +1,59 @@
1
+ """This is the main entry point of the application."""
2
+
3
+ import atexit
4
+ import os
5
+ from .api_client import get_client
6
+ from .config import Config
7
+ from .download_completed_analyses import download_completed_analyses
8
+ from .layer_changelog import (
9
+ record_layer_changes_since_last_run,
10
+ record_successful_geosync_completion,
11
+ )
12
+ from .parse_args import parse_args
13
+ from .session_logger import start_session_logging, stop_session_logging
14
+ from .start_analyses import start_analyses
15
+ from datetime import UTC, datetime
16
+
17
+
18
+ def main() -> None:
19
+ # Start session-wide logging
20
+ print('Starting Nefino GeoSync...')
21
+ start_session_logging()
22
+ start_time = datetime.now(tz=UTC)
23
+ # Ensure logging stops when the program exits
24
+ atexit.register(stop_session_logging)
25
+
26
+ try:
27
+ args = parse_args()
28
+
29
+ if args.configure:
30
+ config = Config.singleton()
31
+ # if you are running with --configure on the first run (you don't need to)
32
+ # you will be prompted to configure the app by the config singleton init.
33
+ # In that case, don't prompt the user again.
34
+ if not config.already_prompted:
35
+ config.run_config_prompts()
36
+
37
+ client = get_client(api_host=os.getenv('NEFINO_API_HOST', default='https://api.nefino.li'))
38
+
39
+ # Check for layer changes since last run before starting new analyses
40
+ changelog_result = record_layer_changes_since_last_run(client)
41
+
42
+ if not args.resume:
43
+ start_analyses(client, changelog_result)
44
+ else:
45
+ download_completed_analyses(client)
46
+
47
+ # Record successful completion
48
+ record_successful_geosync_completion(start_time)
49
+
50
+ except Exception as e:
51
+ print(f'Fatal error: {e}')
52
+ raise
53
+ finally:
54
+ # Ensure logging stops even if there's an error
55
+ stop_session_logging()
56
+
57
+
58
+ if __name__ == '__main__':
59
+ main()