nefino-geosync 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nefino-geosync might be problematic. Click here for more details.

nefino_geosync/journal.py CHANGED
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import os
3
3
  import re
4
- from datetime import datetime
5
- from typing import Dict, Set
6
4
  from .storage import get_app_directory
5
+ from datetime import datetime, timezone
6
+ from typing import Dict, Set
7
+
7
8
 
8
9
  class Journal:
9
10
  """Handles metadata about analyses for efficient downloading."""
@@ -18,92 +19,126 @@ class Journal:
18
19
  cls._instance = Journal()
19
20
  return cls._instance
20
21
 
21
- def __init__(self):
22
+ def __init__(self) -> None:
22
23
  if Journal._instance:
23
- raise Exception("Journal is a singleton class. Use Journal.singleton() to get the instance.")
24
+ raise Exception('Journal is a singleton class. Use Journal.singleton() to get the instance.')
24
25
  # Mapping from analysis pk to the state where the analysis was started
25
26
  self.analysis_states: Dict[str, str] = dict()
26
27
 
28
+ # Mapping from analysis pk to the layers that were requested
29
+ self.analysis_requested_layers: Dict[str, Set[str]] = dict()
30
+
27
31
  # Mapping from layer name to where it was last updated and when
28
32
  self.layer_last_updates: Dict[str, Dict[str, datetime]] = dict()
29
33
 
30
34
  # Record which analyses have been successfully started
31
35
  self.synced_analyses: Set[str] = set()
32
36
 
37
+ # Track the last successful geosync run for changelog queries
38
+ self.last_geosync_run: datetime = None
39
+
33
40
  self.load_analysis_states()
34
41
  self.load_layer_last_updates()
35
42
  self.load_synced_analyses()
43
+ self.load_last_geosync_run()
36
44
 
37
- def save_analysis_states(self):
45
+ def save_analysis_states(self) -> None:
38
46
  """Saves the analysis states to a file."""
39
- with open(os.path.join(get_app_directory(), "analysis_states.json"), "w") as f:
47
+ with open(os.path.join(get_app_directory(), 'analysis_states.json'), 'w') as f:
40
48
  json.dump(self.analysis_states, f)
41
-
42
- def load_analysis_states(self):
49
+
50
+ def load_analysis_states(self) -> None:
43
51
  """Loads the analysis states from a file."""
44
52
  try:
45
- with open(os.path.join(get_app_directory(), "analysis_states.json"), "r") as f:
53
+ with open(os.path.join(get_app_directory(), 'analysis_states.json'), 'r') as f:
46
54
  self.analysis_states = json.load(f)
47
55
  except FileNotFoundError:
48
56
  # we already have an empty dictionary as the field value
49
- print("No saved analysis states found.")
50
-
51
- def save_layer_last_updates(self):
57
+ print('No saved analysis states found.')
58
+
59
+ def save_layer_last_updates(self) -> None:
52
60
  """Saves the layer last updates to a file."""
53
- with open(os.path.join(get_app_directory(), "layer_last_updates.json"), "w") as f:
61
+ with open(os.path.join(get_app_directory(), 'layer_last_updates.json'), 'w') as f:
54
62
  json.dump(self.layer_last_updates, f, default=lambda x: x.isoformat())
55
63
 
56
- def load_layer_last_updates(self):
64
+ def load_layer_last_updates(self) -> None:
57
65
  """Loads the layer last updates from a file."""
58
66
  try:
59
- with open(os.path.join(get_app_directory(), "layer_last_updates.json"), "r") as f:
67
+ with open(os.path.join(get_app_directory(), 'layer_last_updates.json'), 'r') as f:
60
68
  self.layer_last_updates = json.load(f)
61
69
  for cluster in self.layer_last_updates.values():
62
70
  for state, timestamp in cluster.items():
63
71
  cluster[state] = datetime.fromisoformat(timestamp) if timestamp else None
64
72
  except FileNotFoundError:
65
73
  # we already have an empty dictionary as the field value
66
- print("No saved layer last updates found.")
67
-
68
- def save_synced_analyses(self):
74
+ print('No saved layer last updates found.')
75
+
76
+ def save_synced_analyses(self) -> None:
69
77
  """Saves the list of processed analyses to a file."""
70
- with open(os.path.join(get_app_directory(), "synced_analyses.json"), "w") as f:
78
+ with open(os.path.join(get_app_directory(), 'synced_analyses.json'), 'w') as f:
71
79
  json.dump(list(self.synced_analyses), f)
72
-
73
- def load_synced_analyses(self):
80
+
81
+ def load_synced_analyses(self) -> None:
74
82
  """Loads the list of processed analyses from a file."""
75
83
  try:
76
- with open(os.path.join(get_app_directory(), "synced_analyses.json"), "r") as f:
84
+ with open(os.path.join(get_app_directory(), 'synced_analyses.json'), 'r') as f:
77
85
  self.synced_analyses = set(json.load(f))
78
86
  except FileNotFoundError:
79
87
  # we already have an empty set as the field value
80
- print("No saved downloaded analyses found.")
88
+ print('No saved downloaded analyses found.')
89
+
90
+ def save_last_geosync_run(self) -> None:
91
+ """Saves the timestamp of the last successful geosync run."""
92
+ with open(os.path.join(get_app_directory(), 'last_geosync_run.json'), 'w') as f:
93
+ json.dump(self.last_geosync_run.isoformat() if self.last_geosync_run else None, f)
94
+
95
+ def load_last_geosync_run(self) -> None:
96
+ """Loads the timestamp of the last successful geosync run."""
97
+ try:
98
+ with open(os.path.join(get_app_directory(), 'last_geosync_run.json'), 'r') as f:
99
+ timestamp_str = json.load(f)
100
+ self.last_geosync_run = datetime.fromisoformat(timestamp_str) if timestamp_str else None
101
+ except FileNotFoundError:
102
+ # we already have None as the field value
103
+ print('No saved last geosync run timestamp found.')
104
+
105
+ def record_successful_geosync_run(self, start_time: datetime) -> None:
106
+ """Records the current time as the last successful geosync run."""
107
+ self.last_geosync_run = start_time
108
+ self.save_last_geosync_run()
81
109
 
82
- def record_analyses_requested(self, start_analyses_result):
83
- """Records the analyses that have been started, and where they were started."""
84
- pattern = r"^startAnalysis_(?P<state>DE[1-9A-G])$"
85
- for alias, analysis_metadata in start_analyses_result.__dict__.items():
110
+ def record_analyses_requested(self, start_analyses_result, analysis_inputs) -> None:
111
+ """Records the analyses that have been started, where they were started, and which layers were requested."""
112
+ pattern = r'^startAnalysis_(?P<state>DE[1-9A-G])$'
113
+ for alias, analysis_metadata in start_analyses_result.__dict__.items():
86
114
  match = re.match(pattern, alias)
87
115
  if not match:
88
116
  continue
89
- state = match.group("state")
117
+ state = match.group('state')
90
118
  # record where the analysis was started
91
119
  self.analysis_states[analysis_metadata.pk] = state
120
+ # record which layers were requested
121
+ requested_layers = set()
122
+ for request in analysis_inputs[state].specs.requests:
123
+ for layer in request.layers:
124
+ requested_layers.add(layer.layer_name)
125
+ self.analysis_requested_layers[analysis_metadata.pk] = requested_layers
92
126
  self.save_analysis_states()
93
-
94
- def record_layers_unpacked(self, layers: Set[str], state: str, started_at: datetime):
127
+
128
+ def record_layers_unpacked(self, layers: Set[str], state: str, started_at: datetime) -> None:
95
129
  """Records the layers that have been unpacked, and when they were last updated."""
96
- print(f"Recording layers {layers} as unpacked for state {state}")
130
+ print(f'Recording layers {layers} as unpacked for state {state}')
131
+
97
132
  for layer in layers:
98
133
  if layer not in self.layer_last_updates:
99
134
  self.layer_last_updates[layer] = dict()
100
135
  self.layer_last_updates[layer][state] = started_at
101
136
  self.save_layer_last_updates()
102
-
137
+
103
138
  def get_state_for_analysis(self, pk: str) -> str:
104
139
  """Returns the state where the analysis was started."""
105
140
  return self.analysis_states[pk]
106
-
141
+
107
142
  def is_newer_than_saved(self, layer: str, state: str, timestamp: datetime) -> bool:
108
143
  """Checks if the layer needs to be unpacked."""
109
144
  if layer not in self.layer_last_updates:
@@ -112,9 +147,20 @@ class Journal:
112
147
  return True
113
148
  if not self.layer_last_updates[layer][state]:
114
149
  return True
115
- return self.layer_last_updates[layer][state] < timestamp
116
-
117
- def record_analysis_synced(self, pk: str):
150
+
151
+ saved_timestamp = self.layer_last_updates[layer][state]
152
+
153
+ # Handle timezone comparison issues by making both timezone-aware
154
+ if saved_timestamp.tzinfo is None and timestamp.tzinfo is not None:
155
+ # Assume saved timestamp is UTC if it has no timezone info
156
+ saved_timestamp = saved_timestamp.replace(tzinfo=timezone.utc)
157
+ elif saved_timestamp.tzinfo is not None and timestamp.tzinfo is None:
158
+ # Make the API timestamp timezone-aware (assume UTC)
159
+ timestamp = timestamp.replace(tzinfo=timezone.utc)
160
+
161
+ return saved_timestamp < timestamp
162
+
163
+ def record_analysis_synced(self, pk: str) -> None:
118
164
  """Records that the analysis has been downloaded and unpacked."""
119
165
  self.synced_analyses.add(pk)
120
166
  self.save_synced_analyses()
@@ -0,0 +1,240 @@
1
+ """Module for querying and logging layer changelog information."""
2
+
3
+ import csv
4
+ import os
5
+ from .api_client import general_availability_operation, layer_changelog_operation
6
+ from .config import Config
7
+ from .graphql_errors import check_errors
8
+ from .journal import Journal
9
+ from .schema import LayerChangelogEntry
10
+ from datetime import datetime, timezone
11
+ from nefino_geosync.access_rule_filter import AccessRuleFilter
12
+ from sgqlc.endpoint.http import HTTPEndpoint
13
+ from typing import Any
14
+
15
+ LayerChangelogResult = Any
16
+
17
+
18
+ def query_layer_changelog(client: HTTPEndpoint, timestamp_start: str = None) -> LayerChangelogResult:
19
+ """Queries the layer changelog from the GraphQL API."""
20
+ changelog_op = layer_changelog_operation(timestamp_start)
21
+ changelog_data = client(changelog_op)
22
+ check_errors(changelog_data, 'Failed to fetch layer changelog')
23
+ return changelog_op + changelog_data
24
+
25
+
26
+ def record_layer_changes_since_last_run(client: HTTPEndpoint) -> LayerChangelogResult:
27
+ """Logs all layer changes since the last successful geosync run and returns the changelog data."""
28
+ journal = Journal.singleton()
29
+
30
+ # Get the timestamp of the last successful run
31
+ timestamp_start = None
32
+ if journal.last_geosync_run:
33
+ # Format timestamp as required: 2025-09-19T10:30:20.383210+00:00
34
+ if journal.last_geosync_run.tzinfo is None:
35
+ # If timezone-naive, assume UTC
36
+
37
+ aware_timestamp = journal.last_geosync_run.replace(tzinfo=timezone.utc)
38
+ else:
39
+ aware_timestamp = journal.last_geosync_run
40
+
41
+ timestamp_start = aware_timestamp.strftime('%Y-%m-%dT%H:%M:%S.%f%z')
42
+ # Ensure the timezone format includes the colon (e.g., +00:00 not +0000)
43
+ if len(timestamp_start) >= 4 and timestamp_start[-4:].isdigit():
44
+ timestamp_start = timestamp_start[:-2] + ':' + timestamp_start[-2:]
45
+
46
+ print(f'Checking for layer changes since last geosync run: {timestamp_start}')
47
+ else:
48
+ print('No previous geosync run found, skipping changelog check')
49
+ return None
50
+
51
+ # Get available clusters to filter changelog results
52
+ try:
53
+ # First get general availability to determine accessible clusters
54
+ general_op = general_availability_operation()
55
+ general_data = client(general_op)
56
+ check_errors(general_data, 'Failed to fetch general availability for changelog filtering')
57
+ general_availability = general_op + general_data
58
+
59
+ # Use AccessRuleFilter to determine accessible clusters
60
+ rules = AccessRuleFilter(general_availability.access_rules)
61
+
62
+ # Get all places from access rules to check against
63
+ all_places = set()
64
+ for rule in general_availability.access_rules:
65
+ all_places.update(rule.places)
66
+
67
+ accessible_clusters = {
68
+ cluster.name
69
+ for cluster in general_availability.clusters
70
+ if cluster.has_access and any(rules.check(place, cluster.name) for place in all_places)
71
+ }
72
+ print(f'Accessible clusters: {accessible_clusters}')
73
+
74
+ except Exception as e:
75
+ print(f'Failed to fetch accessible clusters, showing all changelog entries: {e}')
76
+ accessible_clusters = set() # Empty set means show all
77
+
78
+ # Query the changelog
79
+ try:
80
+ changelog_result = query_layer_changelog(client, timestamp_start)
81
+ log_changelog_entries(changelog_result, accessible_clusters)
82
+ return changelog_result
83
+ except Exception as e:
84
+ print(f'Failed to retrieve layer changelog: {e}')
85
+ return None
86
+
87
+
88
+ def log_changelog_entries(changelog_result: LayerChangelogResult, accessible_clusters: set = None) -> None:
89
+ """Logs changelog entries, focusing on relevant changes and filtering by accessible clusters."""
90
+ if not hasattr(changelog_result, 'layer_changelog'):
91
+ print('No layer changelog data received')
92
+ return
93
+
94
+ changelog_entries = changelog_result.layer_changelog
95
+ if not changelog_entries:
96
+ print('✅ No layer changes detected for accessible clusters since last run')
97
+ return
98
+
99
+ # Filter entries by accessible clusters and collect those with relevant changes
100
+ relevant_entries = []
101
+ for entry in changelog_entries:
102
+ # Skip if not in accessible clusters
103
+ cluster_name = getattr(entry, 'cluster_name', None)
104
+ if accessible_clusters and (not cluster_name or cluster_name not in accessible_clusters):
105
+ continue
106
+
107
+ # Check for relevant changes
108
+ relevant_changes = _get_relevant_changes(entry)
109
+ if relevant_changes:
110
+ relevant_entries.append((entry, relevant_changes))
111
+
112
+ # If no relevant changes found, show success message
113
+ if not relevant_entries:
114
+ print('✅ No layer changes detected for accessible clusters since last run')
115
+ return
116
+
117
+ print(f'📋 Found {len(relevant_entries)} layer change(s) for accessible clusters since last run:')
118
+
119
+ for entry, relevant_changes in relevant_entries:
120
+ _log_entry_details(entry, relevant_changes)
121
+
122
+ # Save to CSV (use original filtered entries for CSV)
123
+ filtered_entries = [entry for entry, _ in relevant_entries]
124
+ save_changelog_to_csv(filtered_entries)
125
+
126
+
127
+ def _get_relevant_changes(entry: LayerChangelogEntry) -> list:
128
+ """Extract relevant changes from a changelog entry."""
129
+ relevant_changes = []
130
+ if hasattr(entry, 'changed_fields') and entry.changed_fields:
131
+ for field in entry.changed_fields:
132
+ if field in ['attributes', 'layer_name', 'cluster_name']:
133
+ relevant_changes.append(field)
134
+ return relevant_changes
135
+
136
+
137
+ def _log_entry_details(entry: LayerChangelogEntry, relevant_changes: list) -> None:
138
+ """Log details for a single changelog entry."""
139
+ layer_name = getattr(entry, 'layer_name', 'Unknown')
140
+ cluster_name = getattr(entry, 'cluster_name', 'Unknown')
141
+ action = getattr(entry, 'action', 'Unknown')
142
+ timestamp = getattr(entry, 'timestamp', 'Unknown')
143
+
144
+ print(f" 📦 Layer '{layer_name}' (cluster: {cluster_name})")
145
+ print(f' Action: {action}')
146
+ print(f' Changed fields: {", ".join(relevant_changes)}')
147
+ print(f' Timestamp: {timestamp}')
148
+
149
+ # If attributes changed, log the attributes
150
+ if 'attributes' in relevant_changes and hasattr(entry, 'attributes') and entry.attributes:
151
+ print(f' New attributes: {", ".join(entry.attributes)}')
152
+
153
+ print('') # Empty line for readability
154
+
155
+
156
+ def save_changelog_to_csv(filtered_entries: list) -> None:
157
+ """Saves changelog entries to a CSV file in the output directory."""
158
+ if not filtered_entries:
159
+ return
160
+
161
+ config = Config.singleton()
162
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
163
+ csv_filename = f'layer_changelog_{timestamp}.csv'
164
+ csv_path = os.path.join(config.output_path, csv_filename)
165
+
166
+ # Ensure output directory exists
167
+ os.makedirs(config.output_path, exist_ok=True)
168
+
169
+ # Define CSV headers
170
+ headers = ['timestamp', 'layer_name', 'cluster_name', 'action', 'changed_fields', 'attributes']
171
+
172
+ try:
173
+ with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
174
+ writer = csv.DictWriter(csvfile, fieldnames=headers)
175
+ writer.writeheader()
176
+
177
+ for entry in filtered_entries:
178
+ # Filter for relevant changes
179
+ relevant_changes = []
180
+ if hasattr(entry, 'changed_fields') and entry.changed_fields:
181
+ for field in entry.changed_fields:
182
+ if field in ['attributes', 'layer_name', 'cluster_name']:
183
+ relevant_changes.append(field)
184
+
185
+ if not relevant_changes:
186
+ continue # Skip entries without relevant changes
187
+
188
+ # Extract entry data
189
+ layer_name = getattr(entry, 'layer_name', 'Unknown')
190
+ cluster_name = getattr(entry, 'cluster_name', 'Unknown')
191
+ action = getattr(entry, 'action', 'Unknown')
192
+ timestamp_str = getattr(entry, 'timestamp', 'Unknown')
193
+ attributes = ''
194
+ if hasattr(entry, 'attributes') and entry.attributes:
195
+ attributes = ', '.join(entry.attributes)
196
+
197
+ writer.writerow(
198
+ {
199
+ 'timestamp': timestamp_str,
200
+ 'layer_name': layer_name,
201
+ 'cluster_name': cluster_name,
202
+ 'action': action,
203
+ 'changed_fields': ', '.join(relevant_changes),
204
+ 'attributes': attributes,
205
+ }
206
+ )
207
+
208
+ print(f'📊 Changelog saved to CSV: {csv_path}')
209
+ except Exception as e:
210
+ print(f'⚠️ Failed to save changelog to CSV: {e}')
211
+
212
+
213
+ def record_successful_geosync_completion(start_time: datetime) -> None:
214
+ """Records that a geosync run completed successfully."""
215
+ journal = Journal.singleton()
216
+ journal.record_successful_geosync_run(start_time)
217
+ print('✅ Geosync completed successfully')
218
+
219
+
220
+ def layer_has_relevant_changes_in_changelog(
221
+ changelog_result: LayerChangelogResult, layer_name: str, cluster_name: str
222
+ ) -> bool:
223
+ """Check if a layer has relevant changes in the provided changelog data."""
224
+ if not changelog_result or not hasattr(changelog_result, 'layer_changelog'):
225
+ return False
226
+
227
+ if not changelog_result.layer_changelog:
228
+ return False
229
+
230
+ # Check if this specific layer has relevant changes
231
+ for entry in changelog_result.layer_changelog:
232
+ entry_layer_name = getattr(entry, 'layer_name', None)
233
+ entry_cluster_name = getattr(entry, 'cluster_name', None)
234
+
235
+ if entry_layer_name == layer_name and entry_cluster_name == cluster_name:
236
+ relevant_changes = _get_relevant_changes(entry)
237
+ if relevant_changes:
238
+ return True
239
+
240
+ return False
nefino_geosync/run.py CHANGED
@@ -1,28 +1,59 @@
1
1
  """This is the main entry point of the application."""
2
+
3
+ import atexit
2
4
  import os
3
5
  from .api_client import get_client
4
- from .start_analyses import start_analyses
5
- from .download_completed_analyses import download_completed_analyses
6
6
  from .config import Config
7
+ from .download_completed_analyses import download_completed_analyses
8
+ from .layer_changelog import (
9
+ record_layer_changes_since_last_run,
10
+ record_successful_geosync_completion,
11
+ )
7
12
  from .parse_args import parse_args
13
+ from .session_logger import start_session_logging, stop_session_logging
14
+ from .start_analyses import start_analyses
15
+ from datetime import UTC, datetime
16
+
17
+
18
+ def main() -> None:
19
+ # Start session-wide logging
20
+ print('Starting Nefino GeoSync...')
21
+ start_session_logging()
22
+ start_time = datetime.now(tz=UTC)
23
+ # Ensure logging stops when the program exits
24
+ atexit.register(stop_session_logging)
25
+
26
+ try:
27
+ args = parse_args()
28
+
29
+ if args.configure:
30
+ config = Config.singleton()
31
+ # if you are running with --configure on the first run (you don't need to)
32
+ # you will be prompted to configure the app by the config singleton init.
33
+ # In that case, don't prompt the user again.
34
+ if not config.already_prompted:
35
+ config.run_config_prompts()
36
+
37
+ client = get_client(api_host=os.getenv('NEFINO_API_HOST', default='https://api.nefino.li'))
38
+
39
+ # Check for layer changes since last run before starting new analyses
40
+ changelog_result = record_layer_changes_since_last_run(client)
8
41
 
9
- def main():
10
- args = parse_args()
42
+ if not args.resume:
43
+ start_analyses(client, changelog_result)
44
+ else:
45
+ download_completed_analyses(client)
11
46
 
12
- if args.configure:
13
- config = Config.singleton()
14
- # if you are running with --configure on the first run (you don't need to)
15
- # you will be prompted to configure the app by the config singleton init.
16
- # In that case, don't prompt the user again.
17
- if not config.already_prompted:
18
- config.run_config_prompts()
47
+ # Record successful completion
48
+ record_successful_geosync_completion(start_time)
19
49
 
20
- client = get_client(api_host=os.getenv("NEFINO_API_HOST", default="https://api.nefino.li"))
50
+ except Exception as e:
51
+ print(f'Fatal error: {e}')
52
+ raise
53
+ finally:
54
+ # Ensure logging stops even if there's an error
55
+ stop_session_logging()
21
56
 
22
- if not args.resume:
23
- start_analyses(client)
24
- else:
25
- download_completed_analyses(client)
26
57
 
27
- if __name__ == "__main__":
28
- main()
58
+ if __name__ == '__main__':
59
+ main()