PyPI - nefino-geosync - Versions diffs - 0.2.3__py3-none-any.whl - Mend

nefino-geosync 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

nefino_geosync/__init__.py +0 -0
nefino_geosync/access_rule_filter.py +11 -0
nefino_geosync/api_client.py +119 -0
nefino_geosync/compose_requests.py +134 -0
nefino_geosync/config.py +82 -0
nefino_geosync/download_analysis.py +131 -0
nefino_geosync/download_completed_analyses.py +21 -0
nefino_geosync/get_downloadable_analyses.py +48 -0
nefino_geosync/graphql_errors.py +73 -0
nefino_geosync/journal.py +199 -0
nefino_geosync/layer_changelog.py +240 -0
nefino_geosync/parse_args.py +15 -0
nefino_geosync/run.py +59 -0
nefino_geosync/schema.json +2262 -0
nefino_geosync/schema.py +356 -0
nefino_geosync/session_logger.py +112 -0
nefino_geosync/start_analyses.py +57 -0
nefino_geosync/storage.py +40 -0
nefino_geosync-0.2.3.dist-info/METADATA +271 -0
nefino_geosync-0.2.3.dist-info/RECORD +23 -0
nefino_geosync-0.2.3.dist-info/WHEEL +4 -0
nefino_geosync-0.2.3.dist-info/entry_points.txt +2 -0
nefino_geosync-0.2.3.dist-info/licenses/LICENSE +201 -0

nefino_geosync/__init__.py ADDED Viewed

File without changes

nefino_geosync/access_rule_filter.py ADDED Viewed

@@ -0,0 +1,11 @@
+class AccessRuleFilter:
+    def __init__(self, access_rules):
+        self.access_rules = access_rules
+    def check(self, place, cluster):
+        for access_rule in self.access_rules:
+            if place in access_rule.places:
+                if access_rule.all_clusters_enabled or cluster in access_rule.clusters:
+                    return True
+        return False

nefino_geosync/api_client.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""This module handles the API client for the Nefino API.
+If you want to use the Nefino API for something other than fetching the latest geodata,
+you can use this client to interact with the API directly.
+"""
+from .config import Config
+from .schema import GeoAnalysisInput, PlaceTypeGeo, schema
+from sgqlc.endpoint.http import HTTPEndpoint
+from sgqlc.operation import Operation
+from typing import Any, Dict, List
+def get_client(api_host: str = 'https://api.nefino.li') -> HTTPEndpoint:
+    """Returns an HTTP client for the Nefino API."""
+    headers = {'Authorization': Config.singleton().api_key}
+    return HTTPEndpoint(f'{api_host}/external', headers)
+def general_availability_operation() -> Operation:
+    """Returns the general availability of layers and access permissions from Nefino API."""
+    operation = Operation(schema.Query)
+    analysis_areas = operation.allowed_analysis_areas()
+    analysis_areas.all_areas_enabled()
+    analysis_areas.enabled_states().place_id()
+    access_rules = operation.access_rules()
+    access_rules.all_clusters_enabled()
+    access_rules.clusters()
+    access_rules.places()
+    clusters = operation.clusters()
+    clusters.name()
+    clusters.has_access()
+    layers = clusters.layers()
+    layers.name()
+    layers.last_update()
+    layers.is_regional()
+    layers.pre_buffer()
+    return operation
+# any is the most specific type we can write for the results from the availability query
+# this is a limitation of sgqlc types
+# GitHub issue: https://github.com/profusion/sgqlc/issues/129
+GeneralAvailabilityResult = Any
+LocalAvailabilityResult = Any
+def local_availability_operation(
+    availability_result: GeneralAvailabilityResult,
+) -> Operation:
+    """Builds an operation to determine location-specific details of all layers."""
+    operation = Operation(schema.Query)
+    for state in build_states_list(availability_result):
+        regional_layers = operation.regional_layers(
+            # if you request the same field multiple times with different arguments,
+            # you need to give each copy a unique alias
+            __alias__=f'regionalLayers_{state}',
+            place_id=state,
+            place_type=PlaceTypeGeo('FEDERAL_STATE_GEO'),
+        )
+        regional_layers.name()
+        regional_layers.last_update()
+    return operation
+def build_states_list(availability_result: GeneralAvailabilityResult) -> List[str]:
+    """Returns a list of states from the availability result."""
+    if availability_result.allowed_analysis_areas is None:
+        return []
+    if availability_result.allowed_analysis_areas.all_areas_enabled:
+        # DE1 to DEG are the place_ids for the German states (EU scheme)
+        return [f'DE{i}' for i in list('123456789ABCDEFG')]
+    return [state.place_id for state in availability_result.allowed_analysis_areas.enabled_states]
+def start_analyses_operation(inputs: Dict[str, GeoAnalysisInput]) -> Operation:
+    """Builds an operation to start analyses with the given inputs."""
+    operation = Operation(schema.Mutation)
+    for state, input_data in inputs.items():
+        start_analysis = operation.start_analysis(inputs=input_data, __alias__=f'startAnalysis_{state}')
+        start_analysis.pk()
+        start_analysis.status()
+        start_analysis.url()
+    return operation
+def get_analyses_operation() -> Operation:
+    """Builds an operation to get all analyses."""
+    operation = Operation(schema.Query)
+    analyses = operation.analysis_metadata()
+    analyses.pk()
+    analyses.status()
+    analyses.url()
+    analyses.started_at()
+    return operation
+def layer_changelog_operation(timestamp_start: str = None) -> Operation:
+    """Builds an operation to get layer changelog entries."""
+    operation = Operation(schema.Query)
+    # Build the input object for the changelog query
+    changelog_input = {}
+    if timestamp_start:
+        changelog_input['timestampStart'] = timestamp_start
+    changelog = operation.layer_changelog(inputs=changelog_input)
+    changelog.layer_name()
+    changelog.timestamp()
+    changelog.action()
+    changelog.changed_fields()
+    changelog.attributes()
+    changelog.layer_id()
+    changelog.last_update()
+    changelog.cluster_name()
+    changelog.cluster_id()
+    return operation

nefino_geosync/compose_requests.py ADDED Viewed

@@ -0,0 +1,134 @@
+from .access_rule_filter import AccessRuleFilter
+from .api_client import (
+    GeneralAvailabilityResult,
+    LocalAvailabilityResult,
+    build_states_list,
+)
+from .config import Config
+from .journal import Journal
+from .layer_changelog import LayerChangelogResult, layer_has_relevant_changes_in_changelog
+from .parse_args import parse_args
+from .schema import (
+    CoordinateInput,
+    GeoAnalysisInput,
+    GeoAnalysisLayerInput,
+    GeoAnalysisObjectInput,
+    GeoAnalysisOutputFormatInput,
+    GeoAnalysisRequestInput,
+    GeoAnalysisScopeInput,
+    ScopeType,
+)
+from typing import Dict, List, Set
+# Place analyses require a dummy coordinate. It will be ignored in calculations.
+DUMMY_COORDINATE = CoordinateInput(lon=9.0, lat=52.0)
+# The API requires input of combining operations, even if they are not used.
+DUMMY_OPERATIONS = []
+def compose_complete_requests(
+    general_availability: GeneralAvailabilityResult,
+    local_availability: LocalAvailabilityResult,
+    changelog_result: LayerChangelogResult = None,
+) -> Dict[str, GeoAnalysisInput]:
+    """Use fetched data to build the complete requests for all available layers."""
+    available_states = build_states_list(general_availability)
+    # Log the list of available federal states
+    if available_states:
+        print(f'📍 Checking {len(available_states)} available federal state(s): {", ".join(sorted(available_states))}')
+    else:
+        print('⚠️ No federal states available for your account')
+        return {}
+    requests_as_tuples = [
+        (state, compose_single_request(state, general_availability, local_availability, changelog_result))
+        for state in available_states
+    ]
+    # Filter out None requests and notify user about up-to-date states
+    result = {}
+    for state, request in requests_as_tuples:
+        if request is not None:
+            result[state] = request
+        else:
+            print(f'✅ {state} is up-to-date')
+    return result
+def compose_layer_inputs(
+    layers: list, local_layers: Set[str], state: str, cluster_name: str, changelog_result: LayerChangelogResult = None
+) -> List[GeoAnalysisLayerInput]:
+    """Build a list of layer inputs from output lists."""
+    args = parse_args()
+    journal = Journal.singleton()
+    updated_layers = []
+    print(f'  🔍 Checking layers in cluster {cluster_name} for {state}...')
+    for layer in layers:
+        # Check if layer should be processed
+        is_available = (not layer.is_regional) or (layer.name in local_layers)
+        needs_update = journal.is_newer_than_saved(layer.name, state, layer.last_update)
+        has_relevant_changes = layer_has_relevant_changes_in_changelog(changelog_result, layer.name, cluster_name)
+        if is_available and (needs_update or has_relevant_changes):
+            updated_layers.append(layer)
+            if args.verbose:
+                reason = 'last update' if needs_update else 'relevant changes'
+                print(f'    📄 {layer.name} needs update ({reason}: {layer.last_update})')
+    if updated_layers:
+        print(f'    ⚡ Found {len(updated_layers)} in cluster {cluster_name} layers to update for {state}')
+    else:
+        print(f'    ✅ All layers are up-to-date in cluster {cluster_name} for {state}')
+    return [GeoAnalysisLayerInput(layer_name=layer.name, buffer_m=[layer.pre_buffer]) for layer in updated_layers]
+def compose_single_request(
+    state: str,
+    general_availability: GeneralAvailabilityResult,
+    local_availability: LocalAvailabilityResult,
+    changelog_result: LayerChangelogResult = None,
+) -> GeoAnalysisInput:
+    """Build a single request for a given state."""
+    print(f'🔍 Checking layers for {state}...')
+    config = Config.singleton()
+    rules = AccessRuleFilter(general_availability.access_rules)
+    # specify the data we want to add to the analysis
+    state_local_layers = {layer.name for layer in local_availability[f'regionalLayers_{state}']}
+    for skip_layer in config.skip_layers:
+        state_local_layers.discard(skip_layer)
+    requests_as_tuples = [
+        (cluster, compose_layer_inputs(cluster.layers, state_local_layers, state, cluster.name, changelog_result))
+        for cluster in general_availability.clusters
+        if cluster.has_access and rules.check(state, cluster.name)
+    ]
+    requests = [
+        GeoAnalysisRequestInput(cluster_name=cluster.name, layers=layers)
+        for (cluster, layers) in requests_as_tuples
+        if len(layers) > 0
+    ]
+    if len(requests) == 0:
+        return None
+    # Specify the output format
+    # TODO: this should be configurable
+    output = GeoAnalysisOutputFormatInput(template_name='default', type=config.output_format, crs=config.crs)
+    # specify where the analysis should be done
+    scope = GeoAnalysisScopeInput(place=state, type=ScopeType('FEDERAL_STATE'))
+    # put everything together into a specification for an analysis
+    spec = GeoAnalysisObjectInput(
+        coordinate=DUMMY_COORDINATE,
+        output=output,
+        scope=scope,
+        requests=requests,
+        operations=DUMMY_OPERATIONS,
+    )
+    return GeoAnalysisInput(name=f'sync_{state}', specs=spec)

nefino_geosync/config.py ADDED Viewed

@@ -0,0 +1,82 @@
+import json
+import os
+from typing import List
+import questionary
+from .schema import CRSType, OutputObjectType
+from .storage import get_app_directory
+class Config:
+    """This class handles storing and retrieving user preferences."""
+    # This is a singleton class. There should only be one instance of Config.
+    _instance = None
+    @classmethod
+    def singleton(cls):
+        """Returns the singleton instance of Journal."""
+        if not cls._instance:
+            cls._instance = Config()
+        return cls._instance
+    @property
+    def _config_file_path(self) -> str:
+        """Returns the path to the config file."""
+        return os.path.join(get_app_directory(), "config.json")
+    def __init__(self):
+        if Config._instance:
+            raise Exception("Config is a singleton class. Use Config.singleton() to get the instance.")
+        self.already_prompted = False
+        if not os.path.exists(self._config_file_path):
+            self.run_config_prompts(missing_config=True)
+        else:
+            with open(self._config_file_path, "r") as f:
+                config = json.load(f)
+                self.output_path: str = config['output_path']
+                self.output_format: OutputObjectType = OutputObjectType(
+                    config['output_format'])
+                self.crs: CRSType = CRSType(config['crs'])
+                self.skip_layers: List[str] = config['skip_layers']
+                self.api_key: str = config['api_key']
+    def save(self):
+        """Saves the config to a file."""
+        with open(self._config_file_path, "w") as f:
+            json.dump({
+                'output_path': self.output_path,
+                'output_format': self.output_format,
+                'skip_layers': self.skip_layers,
+                'api_key': self.api_key,
+                'crs': self.crs
+            }, f)
+    def run_config_prompts(self, missing_config: bool = False):
+        """Runs the configuration wizard."""
+        self.output_path = questionary.text(
+            "Where do you want to collect downloaded geodata files?",
+            default=os.path.join(get_app_directory(), "newestData") \
+                if missing_config else self.output_path).ask()
+        self.output_format = OutputObjectType(
+            questionary.select(
+                "What format do you want to use for the output files?",
+                instruction="Changing this value after first run will require wiping the downloaded data.",
+                choices=['GPKG', 'SHP'],
+                default='GPKG' if missing_config else self.output_format
+                ).ask())
+        self.crs = CRSType(
+            questionary.select(
+                "What coordinate reference system do you want to use?",
+                choices=[crs for crs in CRSType],
+                default='EPSG_4326' if missing_config else self.crs
+            ).ask())
+        self.api_key = questionary.text(
+            "Enter your API key:",
+            default="" if missing_config else self.api_key).ask()
+        skip_layer_string = questionary.text(
+            "Enter the names of any layers you want to skip downloading, separated by commas.",
+            instruction="Layer names can be found on https://docs.nefino.li/geo.",
+            default="" if missing_config else ",".join(self.skip_layers)).ask()
+        self.skip_layers = [] if skip_layer_string == "" else skip_layer_string.split(",")
+        self.save()
+        self.already_prompted = True

nefino_geosync/download_analysis.py ADDED Viewed

@@ -0,0 +1,131 @@
+import os
+import re
+import zipfile
+from .config import Config
+from .get_downloadable_analyses import AnalysisResult
+from .journal import Journal
+from .storage import get_download_directory
+from datetime import datetime
+from shutil import move, rmtree
+from urllib.request import urlretrieve
+def download_analysis(analysis: AnalysisResult) -> None:
+    """Downloads the analysis to the local machine."""
+    journal = Journal.singleton()
+    download_dir = get_download_directory(analysis.pk)
+    download_file = os.path.join(download_dir, 'download.zip')
+    if os.path.exists(download_file):
+        # remove any failed download
+        os.remove(download_file)
+    urlretrieve(analysis.url.replace(' ', '%20'), download_file)
+    with zipfile.ZipFile(download_file, 'r') as zip_ref:
+        zip_ref.extractall(download_dir)
+    zip_root = get_zip_root(download_dir)
+    unpack_items(zip_root, analysis.pk, analysis.started_at)
+    journal.record_analysis_synced(analysis.pk)
+def get_zip_root(download_dir: str) -> str:
+    """Returns the root directory of the extracted zip file."""
+    # earlier we had a heavily nested structure
+    return download_dir
+FILE_NAME_PATTERN = re.compile(r'(?P<layer>^.*?)(?P<buffer>__[0-9]+m)?(?P<ext>\..{3,4}$)')
+def unpack_items(zip_root: str, pk: str, started_at: datetime) -> None:
+    """
+    Unpacks the layers from the zip file.
+    Args:
+        zip_root: Path to the root directory of the extracted zip
+        pk: Primary key of the analysis
+        started_at: Timestamp when the analysis started
+    """
+    journal = Journal.singleton()
+    config = Config.singleton()
+    if pk not in journal.analysis_states:
+        print(f'Analysis {pk} not found in journal; skipping download')
+        return
+    state = journal.get_state_for_analysis(pk)
+    base_path = get_base_path(zip_root)
+    # Iterate through cluster folders inside the analysis subfolder
+    for cluster in (
+        f for f in os.listdir(base_path) if f != 'analysis_area' and os.path.isdir(os.path.join(base_path, f))
+    ):
+        cluster_dir = os.path.join(base_path, cluster)
+        for file in os.listdir(cluster_dir):
+            if journal.is_newer_than_saved(file, state, started_at):
+                output_dir = os.path.join(config.output_path, state)
+                if not os.path.exists(output_dir):
+                    os.makedirs(output_dir)
+                file_path = os.path.join(cluster_dir, file)
+                match = re.match(FILE_NAME_PATTERN, file)
+                layer, ext = (match.group('layer'), match.group('ext'))
+                # Remove any existing files for the same layer
+                # this is important to avoid confusion if the pre-buffer changes
+                for matching_file in (f for f in os.listdir(output_dir) if f.startswith(layer)):
+                    output_match = re.match(FILE_NAME_PATTERN, matching_file)
+                    # only remove files that match the layer and extension
+                    # otherwise, only the last extension to be unpacked would survive
+                    # also, we are double-checking the layer name here in case we have
+                    # a layer name which starts with a different layer's name
+                    if output_match.group('layer') == layer and output_match.group('ext') == ext:
+                        os.remove(os.path.join(output_dir, matching_file))
+                move(file_path, output_dir)
+    # Update the journal to mark layers as updated. We might have empty layers so we do set all requested layers as
+    # updated.
+    if pk in journal.analysis_requested_layers:
+        layers_to_mark_updated = journal.analysis_requested_layers[pk]
+    else:
+        # Fallback: extract layer names from the ZIP file structure as a safety net
+        print(f'⚠️  Warning: No recorded requested layers for analysis {pk}. Extracting from ZIP structure.')
+        layers_to_mark_updated = set()
+        for cluster in (
+            f for f in os.listdir(base_path) if f != 'analysis_area' and os.path.isdir(os.path.join(base_path, f))
+        ):
+            cluster_dir = os.path.join(base_path, cluster)
+            for file in os.listdir(cluster_dir):
+                match = re.match(FILE_NAME_PATTERN, file)
+                if match:
+                    layers_to_mark_updated.add(match.group('layer'))
+    print(f'Recording {len(layers_to_mark_updated)} requested layers as updated for state {state}')
+    journal.record_layers_unpacked(layers_to_mark_updated, state, started_at)
+    rmtree(zip_root)
+def get_base_path(zip_root: str) -> str:
+    """
+    Returns the base path for the analysis files in the ZIP structure.
+    Handles two different ZIP structures:
+    - Old structure: analysis_summary.xlsx and cluster folders directly in ZIP root
+    - New structure: analysis_summary.xlsx and cluster folders inside a dedicated subfolder
+    The presence of analysis_summary.xlsx in the root directory is used to determine
+    which structure we're dealing with.
+    Args:
+        zip_root: Path to the root directory of the extracted ZIP file
+    Returns:
+        str: Path to the directory containing the cluster folders and analysis_summary.xlsx
+    """
+    if 'analysis_summary.xlsx' in os.listdir(zip_root):
+        # Old structure - use zip_root
+        return zip_root
+    # Get the analysis subfolder name (first and only directory in zip_root)
+    analysis_subfolder = next(f for f in os.listdir(zip_root) if os.path.isdir(os.path.join(zip_root, f)))
+    return os.path.join(zip_root, analysis_subfolder)

nefino_geosync/download_completed_analyses.py ADDED Viewed

@@ -0,0 +1,21 @@
+from .download_analysis import download_analysis
+from .get_downloadable_analyses import get_downloadable_analyses
+from .journal import Journal
+from .parse_args import parse_args
+from sgqlc.endpoint.http import HTTPEndpoint
+def download_completed_analyses(client: HTTPEndpoint) -> None:
+    """Downloads the analyses that have been completed."""
+    journal = Journal.singleton()
+    args = parse_args()
+    for analysis in get_downloadable_analyses(client):
+        if analysis.pk not in journal.synced_analyses:
+            if analysis.pk in journal.analysis_states:
+                if analysis.pk not in journal.analysis_requested_layers:
+                    print(f'⚠️  Warning: Analysis {analysis.pk} found but has no recorded requested layers. Skipping.')
+                    continue
+                download_analysis(analysis)
+                print(f'Downloaded analysis {analysis.pk}')
+        elif args.verbose:
+            print(f'Analysis {analysis.pk} already downloaded')

nefino_geosync/get_downloadable_analyses.py ADDED Viewed

@@ -0,0 +1,48 @@
+from .api_client import get_analyses_operation
+from .graphql_errors import check_errors
+from .parse_args import parse_args
+from .schema import DateTime, Status
+from sgqlc.endpoint.http import HTTPEndpoint
+from time import sleep
+from typing import Generator, Protocol
+# Let's give a quick description of what we want to be fetching.
+# This does depend on what get_analysis_operation() actually does.
+class AnalysisResult(Protocol):
+    status: Status
+    pk: str
+    url: str
+    started_at: DateTime
+def get_downloadable_analyses(
+    client: HTTPEndpoint,
+) -> Generator[AnalysisResult, None, None]:
+    """Yields analyses that are available for download.
+    Polls for more analyses and yields them until no more are available.
+    """
+    verbose = parse_args().verbose
+    op = get_analyses_operation()
+    reported_pks = set()
+    print('Checking for analyses to download...')
+    while True:
+        data = client(op)
+        check_errors(data, 'Failed to fetch analysis status')
+        analyses = op + data
+        found_outstanding_analysis = False
+        for analysis in analyses.analysis_metadata:
+            if analysis.status == Status('PENDING') or analysis.status == Status('RUNNING'):
+                if verbose:
+                    print(f'Analysis {analysis.pk} is still pending or running.')
+                found_outstanding_analysis = True
+            if analysis.status == Status('SUCCESS') and analysis.pk not in reported_pks:
+                reported_pks.add(analysis.pk)
+                yield analysis
+        if not found_outstanding_analysis:
+            break
+        if verbose:
+            print('Waiting for more analyses to finish...')
+        sleep(10)

nefino_geosync/graphql_errors.py ADDED Viewed

@@ -0,0 +1,73 @@
+import html
+import json
+import re
+import sys
+from .parse_args import parse_args
+from datetime import datetime
+from prompt_toolkit import print_formatted_text
+from prompt_toolkit.formatted_text import HTML
+def check_errors(data: dict, context: str = None) -> None:
+    """Check for errors in a GraphQL response."""
+    args = parse_args()
+    if 'errors' in data:
+        if args.verbose:
+            pp('<b>GraphQL operation with errors:</b> ' + html.escape(json.dumps(data, indent=4)))
+        if is_token_invalid(data):
+            pp(
+                '<b fg="red">ERROR:</b> Invalid token. Please run <b>nefino-geosync --configure</b> and double-check your API key.'
+            )
+        else:
+            if not args.verbose:
+                try:
+                    pp(
+                        '<b>Received GraphQL error from server:</b> '
+                        + html.escape(json.dumps(data['errors'], indent=4))
+                    )
+                except Exception as e:
+                    print(e)
+                    print(data['errors'])
+            # Add context information if provided
+            if context:
+                pp(f'<b fg="red">Context:</b> {context}')
+            if not args.verbose:
+                pp("""<b fg="red">ERROR:</b> A GraphQL error occurred. Run with <b>--verbose</b> to see more information.
+If this error persists, please contact Nefino support: https://www.nefino.de/kontakt
+Exiting due to the above error.""")
+            else:
+                pp('<b fg="red">ERROR:</b> A GraphQL error occurred.')
+                pp(
+                    '<b fg="red">If this error persists, please contact Nefino support: https://www.nefino.de/kontakt</b>'
+                )
+                pp('<b fg="red">Exiting due to the above error.</b>')
+        sys.exit(1)
+def pp(to_print: str) -> None:
+    # Display formatted text in console
+    print_formatted_text(HTML(to_print))
+    # For logging: check if stdout has been replaced by TeeStream
+    # If so, write plain text directly to the log file to avoid duplication
+    if hasattr(sys.stdout, 'log_file'):
+        # Remove HTML tags for plain text logging
+        plain_text = re.sub(r'<[^>]+>', '', to_print)
+        timestamp = datetime.now().strftime('%H:%M:%S')
+        sys.stdout.log_file.write(f'[{timestamp}] [STDOUT] {plain_text}\n')
+        sys.stdout.log_file.flush()
+def is_token_invalid(data: dict) -> bool:
+    """Check if the token is invalid."""
+    try:
+        if data['errors'][0]['extensions']['nefino_type'] == 'AuthTokenInvalid':
+            return True
+    except KeyError:
+        return False
+    return False