PyPI - ckanext-csvwmapandtransform - Versions diffs - 1.0.0__py3-none-any.whl - Mend

ckanext-csvwmapandtransform 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

ckanext/csvwmapandtransform/__init__.py +0 -0
ckanext/csvwmapandtransform/action.py +405 -0
ckanext/csvwmapandtransform/assets/script.js +81 -0
ckanext/csvwmapandtransform/assets/style.css +124 -0
ckanext/csvwmapandtransform/assets/webassets.yml +13 -0
ckanext/csvwmapandtransform/auth.py +23 -0
ckanext/csvwmapandtransform/cli.py +18 -0
ckanext/csvwmapandtransform/db.py +397 -0
ckanext/csvwmapandtransform/helpers.py +67 -0
ckanext/csvwmapandtransform/mapper.py +92 -0
ckanext/csvwmapandtransform/plugin.py +140 -0
ckanext/csvwmapandtransform/tasks.py +257 -0
ckanext/csvwmapandtransform/templates/csvwmapandtransform/create_mapping.html +56 -0
ckanext/csvwmapandtransform/templates/csvwmapandtransform/transform.html +108 -0
ckanext/csvwmapandtransform/templates/package/resource_read.html +8 -0
ckanext/csvwmapandtransform/templates/package/snippets/resource_item.html +23 -0
ckanext/csvwmapandtransform/tests/__init__.py +0 -0
ckanext/csvwmapandtransform/views.py +205 -0
ckanext_csvwmapandtransform-1.0.0-py3.13-nspkg.pth +1 -0
ckanext_csvwmapandtransform-1.0.0.dist-info/METADATA +121 -0
ckanext_csvwmapandtransform-1.0.0.dist-info/RECORD +26 -0
ckanext_csvwmapandtransform-1.0.0.dist-info/WHEEL +5 -0
ckanext_csvwmapandtransform-1.0.0.dist-info/entry_points.txt +2 -0
ckanext_csvwmapandtransform-1.0.0.dist-info/licenses/LICENSE +661 -0
ckanext_csvwmapandtransform-1.0.0.dist-info/namespace_packages.txt +1 -0
ckanext_csvwmapandtransform-1.0.0.dist-info/top_level.txt +1 -0

ckanext/csvwmapandtransform/db.py ADDED Viewed

@@ -0,0 +1,397 @@
+"""
+Abstracts a database. Used for storing logging when it aiembeddings a resource into
+DataStore.
+Loosely based on ckan-service-provider's db.py
+"""
+"""
+Abstracts a database. Used for storing logging when it aiembeddings a resource into
+DataStore.
+Loosely based on ckan-service-provider's db.py
+"""
+import datetime
+import json
+import six
+import sqlalchemy
+from ckan.plugins import toolkit
+ENGINE = None
+_METADATA = None
+JOBS_TABLE = None
+METADATA_TABLE = None
+LOGS_TABLE = None
+def init(db_uri: str = "", echo=False):
+    """Initialise the database.
+    Initialise the sqlalchemy engine, metadata and table objects that we use to
+    connect to the database.
+    Create the database and the database tables themselves if they don't
+    already exist.
+    :param uri: the sqlalchemy database URI
+    :type uri: string
+    :param echo: whether or not to have the sqlalchemy engine log all
+        statements to stdout
+    :type echo: bool
+    """
+    if not db_uri:
+        db_uri = toolkit.config.get("ckanext.csvwmapandtransform.db_url")
+    global ENGINE, _METADATA, JOBS_TABLE, METADATA_TABLE, LOGS_TABLE
+    ENGINE = sqlalchemy.create_engine(db_uri, echo=echo, convert_unicode=True)
+    _METADATA = sqlalchemy.MetaData(ENGINE)
+    JOBS_TABLE = _init_jobs_table()
+    METADATA_TABLE = _init_metadata_table()
+    LOGS_TABLE = _init_logs_table()
+    _METADATA.create_all(ENGINE)
+def drop_all():
+    """Delete all the database tables (if they exist).
+    This is for tests to reset the DB. Note that this will delete *all* tables
+    in the database, not just tables created by this module (for example
+    apscheduler's tables will also be deleted).
+    """
+    if _METADATA:
+        _METADATA.drop_all(ENGINE)
+def delete_job(job_id):
+    """Delete a job from the jobs table by job_id.
+    :param job_id: the job_id of the job to be deleted
+    :type job_id: unicode
+    """
+    if job_id:
+        job_id = six.text_type(job_id)
+    msg = ""
+    with ENGINE.connect() as conn:
+        trans = conn.begin()
+        try:
+            result = conn.execute(
+                JOBS_TABLE.delete().where(JOBS_TABLE.c.job_id == job_id)
+            )
+            if result.rowcount == 0:
+                msg = f"No job found with id: {job_id}"
+            else:
+                msg = f"Job with id: {job_id} has been deleted successfully."
+            trans.commit()
+        except Exception as e:
+            trans.rollback()
+            msg = f"An error occurred: {e}"
+    return msg
+def get_job(job_id):
+    """Return the job with the given job_id as a dict."""
+    if job_id:
+        job_id = six.text_type(job_id)
+    with ENGINE.connect() as conn:
+        result = conn.execute(
+            JOBS_TABLE.select().where(JOBS_TABLE.c.job_id == job_id)
+        ).first()
+    if not result:
+        return None
+    result_dict = {
+        field: (
+            value.isoformat()
+            if isinstance(value := getattr(result, field), datetime.datetime)
+            else value
+        )
+        for field in result.keys()
+    }
+    result_dict["metadata"] = _get_metadata(job_id)
+    result_dict["logs"] = _get_logs(job_id)
+    return result_dict
+def add_pending_job(job_id, job_type, data=None, metadata=None, result_url=None):
+    """Add a new job with status "pending" to the jobs table."""
+    if not data:
+        data = {}
+    data = six.text_type(json.dumps(data))
+    if job_id:
+        job_id = six.text_type(job_id)
+    if job_type:
+        job_type = six.text_type(job_type)
+    if result_url:
+        result_url = six.text_type(result_url)
+    if not metadata:
+        metadata = {}
+    with ENGINE.connect() as conn:
+        trans = conn.begin()
+        try:
+            conn.execute(
+                JOBS_TABLE.insert().values(
+                    job_id=job_id,
+                    job_type=job_type,
+                    status="pending",
+                    requested_timestamp=datetime.datetime.utcnow(),
+                    sent_data=data,
+                    result_url=result_url,
+                )
+            )
+            inserts = [
+                {
+                    "job_id": job_id,
+                    "key": six.text_type(key),
+                    "value": six.text_type(
+                        json.dumps(value)
+                        if not isinstance(value, six.string_types)
+                        else value
+                    ),
+                    "type": (
+                        "json" if not isinstance(value, six.string_types) else "string"
+                    ),
+                }
+                for key, value in metadata.items()
+            ]
+            if inserts:
+                conn.execute(METADATA_TABLE.insert(), inserts)
+            trans.commit()
+        except Exception:
+            trans.rollback()
+            raise
+class InvalidErrorObjectError(Exception):
+    pass
+def _validate_error(error):
+    """Validate and return the given error object.
+    Based on the given error object, return either None or a dict with a
+    "message" key whose value is a string (the dict may also have any other
+    keys that it wants).
+    The given "error" object can be:
+    - None, in which case None is returned
+    - A string, in which case a dict like this will be returned:
+      {"message": error_string}
+    - A dict with a "message" key whose value is a string, in which case the
+      dict will be returned unchanged
+    :param error: the error object to validate
+    :raises InvalidErrorObjectError: If the error object doesn't match any of
+        the allowed types
+    """
+    if error is None:
+        return None
+    elif isinstance(error, six.string_types):
+        return {"message": error}
+    else:
+        try:
+            message = error["message"]
+            if isinstance(message, six.string_types):
+                return error
+            else:
+                raise InvalidErrorObjectError("error['message'] must be a string")
+        except (TypeError, KeyError):
+            raise InvalidErrorObjectError(
+                "error must be either a string or a dict with a message key"
+            )
+def _update_job(job_id, job_dict):
+    """Update the database row for the given job_id with the given job_dict."""
+    if job_id:
+        job_id = six.text_type(job_id)
+    if "error" in job_dict:
+        job_dict["error"] = json.dumps(_validate_error(job_dict["error"]))
+        job_dict["error"] = six.text_type(job_dict["error"])
+    if "data" in job_dict:
+        job_dict["data"] = six.text_type(job_dict["data"])
+    with ENGINE.connect() as conn:
+        conn.execute(
+            JOBS_TABLE.update().where(JOBS_TABLE.c.job_id == job_id).values(**job_dict)
+        )
+def mark_job_as_completed(job_id, data=None):
+    """Mark a job as completed successfully.
+    :param job_id: the job_id of the job to be updated
+    :type job_id: unicode
+    :param data: the output data returned by the job
+    :type data: any JSON-serializable type (including None)
+    """
+    update_dict = {
+        "status": "complete",
+        "data": json.dumps(data),
+        "finished_timestamp": datetime.datetime.utcnow(),
+    }
+    _update_job(job_id, update_dict)
+def mark_job_as_missed(job_id):
+    """Mark a job as missed because it was in the queue for too long.
+    :param job_id: the job_id of the job to be updated
+    :type job_id: unicode
+    """
+    update_dict = {
+        "status": "error",
+        "error": "Job delayed too long, service full",
+        "finished_timestamp": datetime.datetime.utcnow(),
+    }
+    _update_job(job_id, update_dict)
+def mark_job_as_errored(job_id, error_object):
+    """Mark a job as failed with an error.
+    :param job_id: the job_id of the job to be updated
+    :type job_id: unicode
+    :param error_object: the error returned by the job
+    :type error_object: either a string or a dict with a "message" key whose
+        value is a string
+    """
+    update_dict = {
+        "status": "error",
+        "error": error_object,
+        "finished_timestamp": datetime.datetime.utcnow(),
+    }
+    _update_job(job_id, update_dict)
+def mark_job_as_failed_to_post_result(job_id):
+    """Mark a job as 'failed to post result'.
+    This happens when a job completes (either successfully or with an error)
+    then trying to post the job result back to the job's callback URL fails.
+    FIXME: This overwrites any error from the job itself!
+    :param job_id: the job_id of the job to be updated
+    :type job_id: unicode
+    """
+    update_dict = {
+        "error": "Process completed but unable to post to result_url",
+    }
+    _update_job(job_id, update_dict)
+def _init_jobs_table():
+    """Initialise the "jobs" table in the db."""
+    _jobs_table = sqlalchemy.Table(
+        "jobs",
+        _METADATA,
+        sqlalchemy.Column("job_id", sqlalchemy.UnicodeText, primary_key=True),
+        sqlalchemy.Column("job_type", sqlalchemy.UnicodeText),
+        sqlalchemy.Column("status", sqlalchemy.UnicodeText, index=True),
+        sqlalchemy.Column("data", sqlalchemy.UnicodeText),
+        sqlalchemy.Column("error", sqlalchemy.UnicodeText),
+        sqlalchemy.Column("requested_timestamp", sqlalchemy.DateTime),
+        sqlalchemy.Column("finished_timestamp", sqlalchemy.DateTime),
+        sqlalchemy.Column("sent_data", sqlalchemy.UnicodeText),
+        # Callback URL:
+        sqlalchemy.Column("result_url", sqlalchemy.UnicodeText),
+    )
+    return _jobs_table
+def _init_metadata_table():
+    """Initialise the "metadata" table in the db."""
+    _metadata_table = sqlalchemy.Table(
+        "metadata",
+        _METADATA,
+        sqlalchemy.Column(
+            "job_id",
+            sqlalchemy.ForeignKey("jobs.job_id", ondelete="CASCADE"),
+            nullable=False,
+            primary_key=True,
+        ),
+        sqlalchemy.Column("key", sqlalchemy.UnicodeText, primary_key=True),
+        sqlalchemy.Column("value", sqlalchemy.UnicodeText, index=True),
+        sqlalchemy.Column("type", sqlalchemy.UnicodeText),
+    )
+    return _metadata_table
+def _init_logs_table():
+    """Initialise the "logs" table in the db."""
+    _logs_table = sqlalchemy.Table(
+        "logs",
+        _METADATA,
+        sqlalchemy.Column(
+            "job_id",
+            sqlalchemy.ForeignKey("jobs.job_id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sqlalchemy.Column("timestamp", sqlalchemy.DateTime),
+        sqlalchemy.Column("message", sqlalchemy.UnicodeText),
+        sqlalchemy.Column("level", sqlalchemy.UnicodeText),
+        sqlalchemy.Column("module", sqlalchemy.UnicodeText),
+        sqlalchemy.Column("funcName", sqlalchemy.UnicodeText),
+        sqlalchemy.Column("lineno", sqlalchemy.Integer),
+    )
+    return _logs_table
+def _get_metadata(job_id):
+    """Return any metadata for the given job_id from the metadata table."""
+    job_id = six.text_type(job_id)
+    with ENGINE.connect() as conn:
+        results = conn.execute(
+            METADATA_TABLE.select().where(METADATA_TABLE.c.job_id == job_id)
+        ).fetchall()
+    metadata = {
+        row["key"]: json.loads(row["value"]) if row["type"] == "json" else row["value"]
+        for row in results
+    }
+    return metadata
+def _get_logs(job_id):
+    """Return any logs for the given job_id from the logs table."""
+    job_id = six.text_type(job_id)
+    with ENGINE.connect() as conn:
+        results = conn.execute(
+            LOGS_TABLE.select().where(LOGS_TABLE.c.job_id == job_id)
+        ).fetchall()
+    results = [dict(result) for result in results]
+    for result in results:
+        result.pop("job_id")
+    return results

ckanext/csvwmapandtransform/helpers.py ADDED Viewed

@@ -0,0 +1,67 @@
+# encoding: utf-8
+import re
+from typing import Any
+import ckan.plugins.toolkit as toolkit
+import requests
+log = __import__("logging").getLogger(__name__)
+def csvwmapandtransform__status_description(status: dict[str, Any]):
+    _ = toolkit._
+    if status.get("status"):
+        captions = {
+            "complete": _("Complete"),
+            "pending": _("Pending"),
+            "submitting": _("Submitting"),
+            "error": _("Error"),
+        }
+        return captions.get(status["status"], status["status"].capitalize())
+    else:
+        return _("Not Uploaded Yet")
+def common_member(a, b):
+    return any(i in b for i in a)
+def csvwmapandtransform_show_tools(resource):
+    formats = toolkit.config.get("ckanext.csvwmapandtransform.formats")
+    format_parts = re.split("/|;", resource["format"].lower().replace(" ", ""))
+    if common_member(format_parts, formats):
+        return True
+    else:
+        False
+def csvwmapandtransform_service_available():
+    url = toolkit.config.get("ckanext.csvwmapandtransform.maptomethod_url")
+    ssl_verify = toolkit.config.get("ckanext.csvwmapandtransform.ssl_verify")
+    # log.debug(f"mapomethodurl: {url} {bool(url)}")
+    if not url:
+        return False  # If EXTRACT_URL is not set, return False
+    try:
+        # Perform a HEAD request (lightweight check) to see if the service responds
+        response = requests.head(url, timeout=5, verify=ssl_verify)
+        # log.debug(f"reponse: {response}")
+        if (200 <= response.status_code < 400) or response.status_code == 405:
+            return True  # URL is reachable and returns a valid status code
+        else:
+            return False  # URL is reachable but response status is not valid
+    except requests.RequestException as e:
+        # If there's any issue (timeout, connection error, etc.)
+        # log.debug(e)
+        return False
+def get_helpers():
+    return {
+        "csvwmapandtransform__status_description": csvwmapandtransform__status_description,
+        "csvwmapandtransform_show_tools": csvwmapandtransform_show_tools,
+        "csvwmapandtransform_service_available": csvwmapandtransform_service_available,
+    }

ckanext/csvwmapandtransform/mapper.py ADDED Viewed

@@ -0,0 +1,92 @@
+import json
+import ckan.plugins.toolkit as toolkit
+import requests
+log = __import__("logging").getLogger(__name__)
+def post_request(url, headers, data, files=None):
+    ssl_verify = toolkit.config.get("ckanext.csvwmapandtransform.ssl_verify")
+    if not ssl_verify:
+        requests.packages.urllib3.disable_warnings()
+    try:
+        if files:
+            # should crate a multipart form upload
+            response = requests.post(
+                url, data=data, headers=headers, files=files, verify=ssl_verify
+            )
+        else:
+            # a application json post request
+            response = requests.post(
+                url, data=json.dumps(data), headers=headers, verify=ssl_verify
+            )
+        response.raise_for_status()
+    except Exception as e:
+        # placeholder for save file / clean-up
+        log.error(e)
+        return None
+        # raise SystemExit(e) from None
+    return response
+def check_mapping(map_url: str, data_url: str, authorization: None):
+    rdfconverter_url = toolkit.config.get(
+        "ckanext.csvwmapandtransform.rdfconverter_url"
+    )
+    log.debug("checking mapping at: {} with data url: {}".format(map_url, data_url))
+    # curl -X 'POST' 'http://docker-dev.iwm.fraunhofer.de:5003/api/checkmapping' -H 'accept: application/json' -H 'Content-Type: application/json' -d '{"data_url": "https://raw.githubusercontent.com/Mat-O-Lab/CSVToCSVW/main/examples/example-metadata.json", "mapping_url": "https://github.com/Mat-O-Lab/MapToMethod/raw/main/examples/example-map.yaml"}'
+    url = rdfconverter_url + "/api/checkmapping"
+    log.debug("rdf converter api call: {}".format(url))
+    data = {"mapping_url": map_url, "data_url": data_url}
+    headers = {"Content-Type": "application/json"}
+    if authorization:
+        headers["Authorization"] = authorization
+    r = post_request(url, headers, data)
+    # r=requests.get(rdfconverter_url+"/info")
+    # log.debug(r)
+    if r and r.status_code == 200:
+        res = r.json()
+        log.debug("map check results: {}".format(res))
+        return res
+    else:
+        log.debug("map check error: {}".format(r))
+        return None
+def get_joined_rdf(map_url: str, data_url: str, authorization: None):
+    log.debug("createing joined rdf: {} with data url: {}".format(map_url, data_url))
+    rdfconverter_url = toolkit.config.get(
+        "ckanext.csvwmapandtransform.rdfconverter_url"
+    )
+    url = rdfconverter_url + "/api/createrdf?return_type=turtle"
+    data = {"mapping_url": map_url, "data_url": data_url}
+    headers = {"Content-type": "application/json", "Accept": "application/json"}
+    if authorization:
+        headers["Authorization"] = authorization
+    log.debug(f"headers: {headers}")
+    r = post_request(url, headers, data)
+    if r and r.status_code == 200:
+        r = r.json()
+        filename = r["filename"]
+        print(
+            "applied {} mapping rules and skipped {}".format(
+                r["num_mappings_applied"], r["num_mappings_skipped"]
+            )
+        )
+        return (
+            filename,
+            r["graph"],
+            r["num_mappings_applied"],
+            r["num_mappings_skipped"],
+        )
+    else:
+        return (
+            None,
+            None,
+            None,
+            None,
+        )

ckanext/csvwmapandtransform/plugin.py ADDED Viewed

@@ -0,0 +1,140 @@
+import os
+import re
+import ckan.plugins as plugins
+import ckan.plugins.toolkit as toolkit
+from ckan import model
+from ckan.config.declaration import Declaration, Key
+from ckan.lib.plugins import DefaultTranslation
+if toolkit.check_ckan_version("2.10"):
+    from ckan.types import Context
+else:
+    class Context(dict):
+        def __init__(self, **kwargs):
+            super().__init__(**kwargs)
+from typing import Any
+from ckanext.csvwmapandtransform import action, auth, helpers, views
+log = __import__("logging").getLogger(__name__)
+class CsvwMapAndTransformPlugin(plugins.SingletonPlugin, DefaultTranslation):
+    plugins.implements(plugins.ITranslation)
+    plugins.implements(plugins.IConfigurer)
+    plugins.implements(plugins.IConfigDeclaration)
+    plugins.implements(plugins.ITemplateHelpers)
+    plugins.implements(plugins.IResourceUrlChange)
+    plugins.implements(plugins.IResourceController, inherit=True)
+    plugins.implements(plugins.IActions)
+    plugins.implements(plugins.IAuthFunctions)
+    plugins.implements(plugins.IBlueprint)
+    # IConfigurer
+    def update_config(self, config_):
+        toolkit.add_template_directory(config_, "templates")
+        toolkit.add_public_directory(config_, "public")
+        toolkit.add_resource("assets", "csvwmapandtransform")
+    # IConfigDeclaration
+    def declare_config_options(self, declaration: Declaration, key: Key):
+        declaration.annotate("csvwmapandtransform")
+        group = key.ckanext.csvwmapandtransform
+        declaration.declare_bool(group.ssl_verify, True)
+        declaration.declare(group.db_url, plugins.toolkit.config.get("sqlalchemy.url"))
+        declaration.declare(group.maptomethod_url, "https://maptomethod.matolab.org")
+        declaration.declare(group.rdfconverter_url, "https://rdfconverter.matolab.org")
+        declaration.declare(group.ckan_token, "")
+        declaration.declare(
+            group.formats, "json json-ld turtle n3 nt hext trig longturtle xml ld+json"
+        )
+    # IResourceUrlChange
+    def notify(self, resource: model.Resource):
+        context: Context = {"ignore_auth": True}
+        resource_dict = toolkit.get_action("resource_show")(
+            context,
+            {
+                "id": resource.id,
+            },
+        )
+        self._sumbit_transform(resource_dict)
+    # IResourceController
+    if not toolkit.check_ckan_version("2.10") or toolkit.check_ckan_version("2.11"):
+        def after_create(self, context, resource_dict):
+            self.after_resource_create(context, resource_dict)
+        # def before_show(self, resource_dict):
+        #     self.before_resource_show(resource_dict)
+        def after_update(self, context: Context, resource_dict: dict[str, Any]):
+            self._sumbit_transform(resource_dict)
+    def after_resource_create(self, context: Context, resource_dict: dict[str, Any]):
+        self._sumbit_transform(resource_dict)
+    def _sumbit_transform(self, resource_dict: dict[str, Any]):
+        context = {"model": model, "ignore_auth": True, "defer_commit": True}
+        formats = toolkit.config.get("ckanext.csvwmapandtransform.formats")
+        format = resource_dict.get("format", None)
+        submit = (
+            format
+            and format.lower() in formats
+            and "-joined" not in resource_dict["url"]
+        )
+        log.debug(
+            "Submitting resource {0} with format {1}".format(
+                resource_dict["id"], format
+            )
+            + " to csvwmapandtransform_transform"
+        )
+        if not submit:
+            return
+        try:
+            log.debug(
+                "Submitting resource {0}".format(resource_dict["id"])
+                + " to csvwmapandtransform_transform"
+            )
+            toolkit.get_action("csvwmapandtransform_transform")(
+                context, {"id": resource_dict["id"]}
+            )
+        except toolkit.ValidationError as e:
+            # If RDFConverter is offline want to catch error instead
+            # of raising otherwise resource save will fail with 500
+            log.critical(e)
+            pass
+    # ITemplateHelpers
+    def get_helpers(self):
+        return helpers.get_helpers()
+    # IActions
+    def get_actions(self):
+        actions = action.get_actions()
+        return actions
+    # IBlueprint
+    def get_blueprint(self):
+        return views.get_blueprint()
+    # IAuthFunctions
+    def get_auth_functions(self):
+        return auth.get_auth_functions()