ckanext-csvwmapandtransform 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ckanext/csvwmapandtransform/__init__.py +22 -0
- ckanext/csvwmapandtransform/action.py +405 -0
- ckanext/csvwmapandtransform/assets/.gitignore +0 -0
- ckanext/csvwmapandtransform/assets/script.js +81 -0
- ckanext/csvwmapandtransform/assets/style.css +124 -0
- ckanext/csvwmapandtransform/assets/webassets.yml +13 -0
- ckanext/csvwmapandtransform/auth.py +23 -0
- ckanext/csvwmapandtransform/cli.py +18 -0
- ckanext/csvwmapandtransform/db.py +397 -0
- ckanext/csvwmapandtransform/helpers.py +67 -0
- ckanext/csvwmapandtransform/i18n/.gitignore +0 -0
- ckanext/csvwmapandtransform/i18n/ckanext-csvwmapandtransform.pot +108 -0
- ckanext/csvwmapandtransform/i18n/de/LC_MESSAGES/ckanext-csvwmapandtransform.mo +0 -0
- ckanext/csvwmapandtransform/i18n/de/LC_MESSAGES/ckanext-csvwmapandtransform.po +113 -0
- ckanext/csvwmapandtransform/mapper.py +133 -0
- ckanext/csvwmapandtransform/plugin.py +140 -0
- ckanext/csvwmapandtransform/public/.gitignore +0 -0
- ckanext/csvwmapandtransform/public/dotted.png +0 -0
- ckanext/csvwmapandtransform/tasks.py +262 -0
- ckanext/csvwmapandtransform/templates/.gitignore +0 -0
- ckanext/csvwmapandtransform/templates/csvwmapandtransform/create_mapping.html +56 -0
- ckanext/csvwmapandtransform/templates/csvwmapandtransform/transform.html +108 -0
- ckanext/csvwmapandtransform/templates/package/resource_read.html +8 -0
- ckanext/csvwmapandtransform/templates/package/snippets/resource_item.html +23 -0
- ckanext/csvwmapandtransform/tests/__init__.py +0 -0
- ckanext/csvwmapandtransform/views.py +205 -0
- ckanext_csvwmapandtransform-0.0.1-py3.14-nspkg.pth +1 -0
- ckanext_csvwmapandtransform-0.0.1.dist-info/METADATA +121 -0
- ckanext_csvwmapandtransform-0.0.1.dist-info/RECORD +34 -0
- ckanext_csvwmapandtransform-0.0.1.dist-info/WHEEL +5 -0
- ckanext_csvwmapandtransform-0.0.1.dist-info/entry_points.txt +5 -0
- ckanext_csvwmapandtransform-0.0.1.dist-info/licenses/LICENSE +661 -0
- ckanext_csvwmapandtransform-0.0.1.dist-info/namespace_packages.txt +1 -0
- ckanext_csvwmapandtransform-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import ckan.plugins.toolkit as toolkit
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
log = __import__("logging").getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def post_request(url, headers, data, files=None):
|
|
10
|
+
ssl_verify = toolkit.config.get("ckanext.csvwmapandtransform.ssl_verify")
|
|
11
|
+
if not ssl_verify:
|
|
12
|
+
requests.packages.urllib3.disable_warnings()
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
if files:
|
|
16
|
+
# should create a multipart form upload
|
|
17
|
+
response = requests.post(
|
|
18
|
+
url, data=data, headers=headers, files=files, verify=ssl_verify
|
|
19
|
+
)
|
|
20
|
+
else:
|
|
21
|
+
# a application json post request
|
|
22
|
+
response = requests.post(
|
|
23
|
+
url, data=json.dumps(data), headers=headers, verify=ssl_verify
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Log response details before raising for non-OK responses
|
|
27
|
+
if not response.ok:
|
|
28
|
+
error_body = response.text[:500] if response.text else "No response body"
|
|
29
|
+
log.error(
|
|
30
|
+
f"HTTP {response.status_code} from {url}: {error_body}"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
response.raise_for_status()
|
|
34
|
+
return response
|
|
35
|
+
|
|
36
|
+
except requests.exceptions.HTTPError as e:
|
|
37
|
+
error_body = e.response.text[:500] if e.response and e.response.text else "No response body"
|
|
38
|
+
log.error(
|
|
39
|
+
f"HTTP Error {e.response.status_code if e.response else 'Unknown'} calling {url}: {error_body}"
|
|
40
|
+
)
|
|
41
|
+
return None
|
|
42
|
+
except requests.exceptions.ConnectionError as e:
|
|
43
|
+
log.error(f"Connection Error calling {url}: {str(e)}")
|
|
44
|
+
return None
|
|
45
|
+
except requests.exceptions.Timeout as e:
|
|
46
|
+
log.error(f"Timeout calling {url}: {str(e)}")
|
|
47
|
+
return None
|
|
48
|
+
except Exception as e:
|
|
49
|
+
log.error(f"Unexpected error calling {url}: {type(e).__name__}: {str(e)}")
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def check_mapping(map_url: str, data_url: str, authorization: None):
|
|
54
|
+
rdfconverter_url = toolkit.config.get(
|
|
55
|
+
"ckanext.csvwmapandtransform.rdfconverter_url"
|
|
56
|
+
)
|
|
57
|
+
log.debug("checking mapping at: {} with data url: {}".format(map_url, data_url))
|
|
58
|
+
# curl -X 'POST' 'http://docker-dev.iwm.fraunhofer.de:5003/api/checkmapping' -H 'accept: application/json' -H 'Content-Type: application/json' -d '{"data_url": "https://raw.githubusercontent.com/Mat-O-Lab/CSVToCSVW/main/examples/example-metadata.json", "mapping_url": "https://github.com/Mat-O-Lab/MapToMethod/raw/main/examples/example-map.yaml"}'
|
|
59
|
+
url = rdfconverter_url + "/api/checkmapping"
|
|
60
|
+
log.debug("rdf converter api call: {}".format(url))
|
|
61
|
+
data = {"mapping_url": map_url, "data_url": data_url}
|
|
62
|
+
headers = {"Content-Type": "application/json"}
|
|
63
|
+
if authorization:
|
|
64
|
+
headers["Authorization"] = authorization
|
|
65
|
+
r = post_request(url, headers, data)
|
|
66
|
+
# r=requests.get(rdfconverter_url+"/info")
|
|
67
|
+
# log.debug(r)
|
|
68
|
+
if r and r.status_code == 200:
|
|
69
|
+
res = r.json()
|
|
70
|
+
log.debug("map check results: {}".format(res))
|
|
71
|
+
return res
|
|
72
|
+
else:
|
|
73
|
+
log.debug("map check error: {}".format(r))
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_joined_rdf(map_url: str, data_url: str, authorization: None):
|
|
78
|
+
log.info(f"Creating joined RDF with mapping: {map_url} and data: {data_url}")
|
|
79
|
+
rdfconverter_url = toolkit.config.get(
|
|
80
|
+
"ckanext.csvwmapandtransform.rdfconverter_url"
|
|
81
|
+
)
|
|
82
|
+
url = rdfconverter_url + "/api/createrdf?return_type=turtle"
|
|
83
|
+
data = {"mapping_url": map_url, "data_url": data_url}
|
|
84
|
+
headers = {"Content-type": "application/json", "Accept": "application/json"}
|
|
85
|
+
if authorization:
|
|
86
|
+
headers["Authorization"] = authorization
|
|
87
|
+
log.debug(f"Request headers: {headers}")
|
|
88
|
+
log.debug(f"Request data: {data}")
|
|
89
|
+
|
|
90
|
+
r = post_request(url, headers, data)
|
|
91
|
+
|
|
92
|
+
if r is None:
|
|
93
|
+
log.error(
|
|
94
|
+
f"Failed to get response from RDF converter at {url}. "
|
|
95
|
+
f"Mapping: {map_url}, Data: {data_url}"
|
|
96
|
+
)
|
|
97
|
+
return (None, None, None, None)
|
|
98
|
+
|
|
99
|
+
if r.status_code == 200:
|
|
100
|
+
try:
|
|
101
|
+
response_json = r.json()
|
|
102
|
+
filename = response_json.get("filename")
|
|
103
|
+
graph = response_json.get("graph")
|
|
104
|
+
num_applied = response_json.get("num_mappings_applied")
|
|
105
|
+
num_skipped = response_json.get("num_mappings_skipped")
|
|
106
|
+
|
|
107
|
+
if not filename or not graph:
|
|
108
|
+
log.error(
|
|
109
|
+
f"RDF converter returned incomplete response. "
|
|
110
|
+
f"Filename: {filename}, Graph present: {bool(graph)}"
|
|
111
|
+
)
|
|
112
|
+
return (None, None, None, None)
|
|
113
|
+
|
|
114
|
+
log.info(
|
|
115
|
+
f"Successfully created RDF: {filename} "
|
|
116
|
+
f"(applied {num_applied} rules, skipped {num_skipped})"
|
|
117
|
+
)
|
|
118
|
+
return (filename, graph, num_applied, num_skipped)
|
|
119
|
+
|
|
120
|
+
except (ValueError, KeyError) as e:
|
|
121
|
+
log.error(
|
|
122
|
+
f"Failed to parse RDF converter response: {type(e).__name__}: {str(e)}. "
|
|
123
|
+
f"Response: {r.text[:500]}"
|
|
124
|
+
)
|
|
125
|
+
return (None, None, None, None)
|
|
126
|
+
else:
|
|
127
|
+
error_msg = r.text[:500] if r.text else "No error message"
|
|
128
|
+
log.error(
|
|
129
|
+
f"RDF converter returned status {r.status_code}. "
|
|
130
|
+
f"Mapping: {map_url}, Data: {data_url}. "
|
|
131
|
+
f"Error: {error_msg}"
|
|
132
|
+
)
|
|
133
|
+
return (None, None, None, None)
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
import ckan.plugins as plugins
|
|
5
|
+
import ckan.plugins.toolkit as toolkit
|
|
6
|
+
from ckan import model
|
|
7
|
+
from ckan.config.declaration import Declaration, Key
|
|
8
|
+
from ckan.lib.plugins import DefaultTranslation
|
|
9
|
+
|
|
10
|
+
if toolkit.check_ckan_version("2.10"):
|
|
11
|
+
from ckan.types import Context
|
|
12
|
+
else:
|
|
13
|
+
|
|
14
|
+
class Context(dict):
|
|
15
|
+
def __init__(self, **kwargs):
|
|
16
|
+
super().__init__(**kwargs)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from ckanext.csvwmapandtransform import action, auth, helpers, views
|
|
22
|
+
|
|
23
|
+
log = __import__("logging").getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CsvwMapAndTransformPlugin(plugins.SingletonPlugin, DefaultTranslation):
|
|
27
|
+
plugins.implements(plugins.ITranslation)
|
|
28
|
+
plugins.implements(plugins.IConfigurer)
|
|
29
|
+
plugins.implements(plugins.IConfigDeclaration)
|
|
30
|
+
plugins.implements(plugins.ITemplateHelpers)
|
|
31
|
+
plugins.implements(plugins.IResourceUrlChange)
|
|
32
|
+
plugins.implements(plugins.IResourceController, inherit=True)
|
|
33
|
+
plugins.implements(plugins.IActions)
|
|
34
|
+
plugins.implements(plugins.IAuthFunctions)
|
|
35
|
+
plugins.implements(plugins.IBlueprint)
|
|
36
|
+
|
|
37
|
+
# IConfigurer
|
|
38
|
+
|
|
39
|
+
def update_config(self, config_):
|
|
40
|
+
toolkit.add_template_directory(config_, "templates")
|
|
41
|
+
toolkit.add_public_directory(config_, "public")
|
|
42
|
+
toolkit.add_resource("assets", "csvwmapandtransform")
|
|
43
|
+
|
|
44
|
+
# IConfigDeclaration
|
|
45
|
+
|
|
46
|
+
def declare_config_options(self, declaration: Declaration, key: Key):
|
|
47
|
+
|
|
48
|
+
declaration.annotate("csvwmapandtransform")
|
|
49
|
+
group = key.ckanext.csvwmapandtransform
|
|
50
|
+
declaration.declare_bool(group.ssl_verify, True)
|
|
51
|
+
declaration.declare(group.db_url, plugins.toolkit.config.get("sqlalchemy.url"))
|
|
52
|
+
declaration.declare(group.maptomethod_url, "https://maptomethod.matolab.org")
|
|
53
|
+
declaration.declare(group.rdfconverter_url, "https://rdfconverter.matolab.org")
|
|
54
|
+
declaration.declare(group.ckan_token, "")
|
|
55
|
+
declaration.declare(
|
|
56
|
+
group.formats, "json json-ld turtle n3 nt hext trig longturtle xml ld+json"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# IResourceUrlChange
|
|
60
|
+
|
|
61
|
+
def notify(self, resource: model.Resource):
|
|
62
|
+
context: Context = {"ignore_auth": True}
|
|
63
|
+
resource_dict = toolkit.get_action("resource_show")(
|
|
64
|
+
context,
|
|
65
|
+
{
|
|
66
|
+
"id": resource.id,
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
self._sumbit_transform(resource_dict)
|
|
70
|
+
|
|
71
|
+
# IResourceController
|
|
72
|
+
|
|
73
|
+
if not toolkit.check_ckan_version("2.10") or toolkit.check_ckan_version("2.11"):
|
|
74
|
+
|
|
75
|
+
def after_create(self, context, resource_dict):
|
|
76
|
+
self.after_resource_create(context, resource_dict)
|
|
77
|
+
|
|
78
|
+
# def before_show(self, resource_dict):
|
|
79
|
+
# self.before_resource_show(resource_dict)
|
|
80
|
+
|
|
81
|
+
def after_update(self, context: Context, resource_dict: dict[str, Any]):
|
|
82
|
+
self._sumbit_transform(resource_dict)
|
|
83
|
+
|
|
84
|
+
def after_resource_create(self, context: Context, resource_dict: dict[str, Any]):
|
|
85
|
+
self._sumbit_transform(resource_dict)
|
|
86
|
+
|
|
87
|
+
def _sumbit_transform(self, resource_dict: dict[str, Any]):
|
|
88
|
+
context = {"model": model, "ignore_auth": True, "defer_commit": True}
|
|
89
|
+
formats = toolkit.config.get("ckanext.csvwmapandtransform.formats")
|
|
90
|
+
format = resource_dict.get("format", None)
|
|
91
|
+
submit = (
|
|
92
|
+
format
|
|
93
|
+
and format.lower() in formats
|
|
94
|
+
and "-joined" not in resource_dict["url"]
|
|
95
|
+
)
|
|
96
|
+
log.debug(
|
|
97
|
+
"Submitting resource {0} with format {1}".format(
|
|
98
|
+
resource_dict["id"], format
|
|
99
|
+
)
|
|
100
|
+
+ " to csvwmapandtransform_transform"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if not submit:
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
log.debug(
|
|
108
|
+
"Submitting resource {0}".format(resource_dict["id"])
|
|
109
|
+
+ " to csvwmapandtransform_transform"
|
|
110
|
+
)
|
|
111
|
+
toolkit.get_action("csvwmapandtransform_transform")(
|
|
112
|
+
context, {"id": resource_dict["id"]}
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
except toolkit.ValidationError as e:
|
|
116
|
+
# If RDFConverter is offline want to catch error instead
|
|
117
|
+
# of raising otherwise resource save will fail with 500
|
|
118
|
+
log.critical(e)
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
# ITemplateHelpers
|
|
122
|
+
|
|
123
|
+
def get_helpers(self):
|
|
124
|
+
return helpers.get_helpers()
|
|
125
|
+
|
|
126
|
+
# IActions
|
|
127
|
+
|
|
128
|
+
def get_actions(self):
|
|
129
|
+
actions = action.get_actions()
|
|
130
|
+
return actions
|
|
131
|
+
|
|
132
|
+
# IBlueprint
|
|
133
|
+
|
|
134
|
+
def get_blueprint(self):
|
|
135
|
+
return views.get_blueprint()
|
|
136
|
+
|
|
137
|
+
# IAuthFunctions
|
|
138
|
+
|
|
139
|
+
def get_auth_functions(self):
|
|
140
|
+
return auth.get_auth_functions()
|
|
File without changes
|
|
Binary file
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import json
|
|
3
|
+
import tempfile
|
|
4
|
+
|
|
5
|
+
import ckanapi
|
|
6
|
+
import ckanapi.datapackage
|
|
7
|
+
import requests
|
|
8
|
+
from ckan import model
|
|
9
|
+
from ckan.plugins.toolkit import asbool, config, get_action
|
|
10
|
+
|
|
11
|
+
# from ckanext.csvtocsvw.annotate import annotate_csv_upload
|
|
12
|
+
from ckanext.csvwmapandtransform import db, mapper
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
from urllib.parse import urlsplit
|
|
16
|
+
except ImportError:
|
|
17
|
+
from urlparse import urlsplit
|
|
18
|
+
|
|
19
|
+
# log = __import__("logging").getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
CHUNK_INSERT_ROWS = 250
|
|
22
|
+
|
|
23
|
+
from rq import get_current_job
|
|
24
|
+
from werkzeug.datastructures import FileStorage as FlaskFileStorage
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def transform(
|
|
28
|
+
res_url, res_id, dataset_id, callback_url, last_updated, skip_if_no_changes=True
|
|
29
|
+
):
|
|
30
|
+
# url = '{ckan}/dataset/{pkg}/resource/{res_id}/download/{filename}'.format(
|
|
31
|
+
# ckan=CKAN_URL, pkg=dataset_id, res_id=res_id, filename=res_url)
|
|
32
|
+
tomap_res = get_action("resource_show")({"ignore_auth": True}, {"id": res_id})
|
|
33
|
+
context = {"session": model.meta.create_local_session(), "ignore_auth": True}
|
|
34
|
+
metadata = {
|
|
35
|
+
"ckan_url": config.get("ckan.site_url"),
|
|
36
|
+
"resource_id": res_id,
|
|
37
|
+
"task_created": last_updated,
|
|
38
|
+
"original_url": res_url,
|
|
39
|
+
}
|
|
40
|
+
token = config.get("ckanext.csvwmapandtransform.ckan_token")
|
|
41
|
+
job_info = dict()
|
|
42
|
+
job_dict = dict(metadata=metadata, status="running", job_info=job_info)
|
|
43
|
+
job_id = get_current_job().id
|
|
44
|
+
errored = False
|
|
45
|
+
db.init()
|
|
46
|
+
|
|
47
|
+
# Set-up logging to the db
|
|
48
|
+
handler = StoringHandler(job_id, job_dict)
|
|
49
|
+
level = logging.DEBUG
|
|
50
|
+
handler.setLevel(level)
|
|
51
|
+
logger = logging.getLogger(job_id)
|
|
52
|
+
# logger = logging.getLogger()
|
|
53
|
+
handler.setFormatter(logging.Formatter("%(message)s"))
|
|
54
|
+
logger.addHandler(handler)
|
|
55
|
+
# also show logs on stderr
|
|
56
|
+
logger.addHandler(logging.StreamHandler())
|
|
57
|
+
logger.setLevel(logging.DEBUG)
|
|
58
|
+
|
|
59
|
+
callback_csvwmapandtransform_hook(callback_url, api_key=token, job_dict=job_dict)
|
|
60
|
+
logger.info("Trying to find fitting mapping for: {}".format(tomap_res["url"]))
|
|
61
|
+
# need to get it as string, casue url annotation doesnt work with private datasets
|
|
62
|
+
# filename,filedata=annotate_csv_uri(csv_res['url'])
|
|
63
|
+
mappings = get_action("csvwmapandtransform_find_mappings")({}, {})
|
|
64
|
+
mapping_urls = [res["url"] for res in mappings]
|
|
65
|
+
logger.info("Mappings found: {}".format(mapping_urls))
|
|
66
|
+
# tests=get_action(u'csvwmapandtransform_test_mappings')(
|
|
67
|
+
# {}, {
|
|
68
|
+
# u'data_url': resource['url'],
|
|
69
|
+
# u'map_urls': [res['url'] for res in mapping_resources]
|
|
70
|
+
# }
|
|
71
|
+
# )
|
|
72
|
+
logger.info("testing mappings with: {}".format(tomap_res["url"]))
|
|
73
|
+
# tests=get_action(u'csvwmapandtransform_test_map
|
|
74
|
+
res = [
|
|
75
|
+
{
|
|
76
|
+
"mapping": map_url,
|
|
77
|
+
"test": mapper.check_mapping(
|
|
78
|
+
map_url=map_url,
|
|
79
|
+
data_url=tomap_res["url"],
|
|
80
|
+
authorization=token,
|
|
81
|
+
),
|
|
82
|
+
}
|
|
83
|
+
for map_url in mapping_urls
|
|
84
|
+
]
|
|
85
|
+
# remove None resulting test Items
|
|
86
|
+
valid_items = [item for item in res if item["test"]]
|
|
87
|
+
for item in valid_items:
|
|
88
|
+
if item["test"]:
|
|
89
|
+
# the more rules can be applied and the more are not skipped the better the mapping
|
|
90
|
+
item["rating"] = (
|
|
91
|
+
item["test"]["rules_applicable"] - item["test"]["rules_skipped"]
|
|
92
|
+
)
|
|
93
|
+
# sort by rating
|
|
94
|
+
sorted_list = sorted(valid_items, key=lambda x: x["rating"], reverse=True)
|
|
95
|
+
logger.info("Rated mappings: {}".format(sorted_list))
|
|
96
|
+
callback_csvwmapandtransform_hook(callback_url, api_key=token, job_dict=job_dict)
|
|
97
|
+
# best cnadidate is sorted_list[0]
|
|
98
|
+
if sorted_list and sorted_list[0]["rating"] > 0:
|
|
99
|
+
best_condidate = sorted_list[0]["mapping"]
|
|
100
|
+
else:
|
|
101
|
+
best_condidate = None
|
|
102
|
+
# run mapping and join data
|
|
103
|
+
if best_condidate:
|
|
104
|
+
logger.info(f"Applying best mapping candidate: {best_condidate}")
|
|
105
|
+
filename, graph_data, num_applied, num_skipped = mapper.get_joined_rdf(
|
|
106
|
+
map_url=best_condidate,
|
|
107
|
+
data_url=tomap_res["url"],
|
|
108
|
+
authorization=token,
|
|
109
|
+
)
|
|
110
|
+
if not filename:
|
|
111
|
+
errored = True
|
|
112
|
+
logger.error(
|
|
113
|
+
f"Failed to generate RDF from mapping {best_condidate} for resource {tomap_res['url']}. "
|
|
114
|
+
"Check previous error messages for details."
|
|
115
|
+
)
|
|
116
|
+
else:
|
|
117
|
+
s = requests.Session()
|
|
118
|
+
s.headers.update({"Authorization": token})
|
|
119
|
+
prefix, suffix = filename.rsplit(".", 1)
|
|
120
|
+
if not prefix:
|
|
121
|
+
prefix = "unnamed"
|
|
122
|
+
if not suffix:
|
|
123
|
+
suffix = "ttl"
|
|
124
|
+
# log.debug(csv_data)
|
|
125
|
+
# # Upload resource to CKAN as a new/updated resource
|
|
126
|
+
ressouce_existing = resource_search(dataset_id, filename)
|
|
127
|
+
with tempfile.NamedTemporaryFile(
|
|
128
|
+
prefix=prefix, suffix="." + suffix
|
|
129
|
+
) as graph_file:
|
|
130
|
+
graph_file.write(graph_data.encode("utf-8"))
|
|
131
|
+
graph_file.seek(0)
|
|
132
|
+
tmp_filename = graph_file.name
|
|
133
|
+
upload = FlaskFileStorage(open(tmp_filename, "rb"), filename)
|
|
134
|
+
resource = dict(
|
|
135
|
+
package_id=dataset_id,
|
|
136
|
+
# url='dummy-value',
|
|
137
|
+
upload=upload,
|
|
138
|
+
name=filename,
|
|
139
|
+
format="text/turtle; charset=utf-8",
|
|
140
|
+
)
|
|
141
|
+
if not ressouce_existing:
|
|
142
|
+
logger.info(
|
|
143
|
+
"Writing new resource {} to dataset {}".format(
|
|
144
|
+
filename, dataset_id
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
# local_ckan.action.resource_create(**resource)
|
|
148
|
+
metadata_res = get_action("resource_create")(
|
|
149
|
+
{"ignore_auth": True}, resource
|
|
150
|
+
)
|
|
151
|
+
else:
|
|
152
|
+
logger.info(
|
|
153
|
+
"Updating resource - {}".format(ressouce_existing["url"])
|
|
154
|
+
)
|
|
155
|
+
# local_ckan.action.resource_patch(
|
|
156
|
+
# id=res['id'],
|
|
157
|
+
# **resource)
|
|
158
|
+
resource["id"] = ressouce_existing["id"]
|
|
159
|
+
metadata_res = get_action("resource_update")(
|
|
160
|
+
{"ignore_auth": True}, resource
|
|
161
|
+
)
|
|
162
|
+
logger.info("job completed results at {}".format(metadata_res["url"]))
|
|
163
|
+
else:
|
|
164
|
+
logger.warning(
|
|
165
|
+
"found no mapping candidate for resource {}".format(tomap_res["url"])
|
|
166
|
+
)
|
|
167
|
+
# all is done update job status
|
|
168
|
+
job_dict["status"] = "complete"
|
|
169
|
+
callback_csvwmapandtransform_hook(callback_url, api_key=token, job_dict=job_dict)
|
|
170
|
+
return "error" if errored else None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def get_resource(id):
|
|
174
|
+
local_ckan = ckanapi.LocalCKAN()
|
|
175
|
+
try:
|
|
176
|
+
res = local_ckan.action.resource_show(id=id)
|
|
177
|
+
except:
|
|
178
|
+
return False
|
|
179
|
+
else:
|
|
180
|
+
return res
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def resource_search(dataset_id, res_name):
|
|
184
|
+
local_ckan = ckanapi.LocalCKAN()
|
|
185
|
+
dataset = local_ckan.action.package_show(id=dataset_id)
|
|
186
|
+
for res in dataset["resources"]:
|
|
187
|
+
if res["name"] == res_name:
|
|
188
|
+
return res
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def callback_csvwmapandtransform_hook(result_url, api_key, job_dict):
|
|
193
|
+
"""Tells CKAN about the result of the csvwmapandtransform (i.e. calls the callback
|
|
194
|
+
function 'csvwmapandtransform_hook'). Usually called by the csvwmapandtransform queue job.
|
|
195
|
+
"""
|
|
196
|
+
headers = {"Content-Type": "application/json"}
|
|
197
|
+
if api_key:
|
|
198
|
+
if ":" in api_key:
|
|
199
|
+
header, key = api_key.split(":")
|
|
200
|
+
else:
|
|
201
|
+
header, key = "Authorization", api_key
|
|
202
|
+
headers[header] = key
|
|
203
|
+
ssl_verify = config.get("ckanext.csvwmapandtransform.ssl_verify")
|
|
204
|
+
if not ssl_verify:
|
|
205
|
+
requests.packages.urllib3.disable_warnings()
|
|
206
|
+
try:
|
|
207
|
+
result = requests.post(
|
|
208
|
+
result_url,
|
|
209
|
+
data=json.dumps(job_dict, cls=DatetimeJsonEncoder),
|
|
210
|
+
verify=ssl_verify,
|
|
211
|
+
headers=headers,
|
|
212
|
+
)
|
|
213
|
+
except requests.ConnectionError:
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
return result.status_code == requests.codes.ok
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
import logging
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class StoringHandler(logging.Handler):
|
|
223
|
+
"""A handler that stores the logging records in a database."""
|
|
224
|
+
|
|
225
|
+
def __init__(self, task_id, input):
|
|
226
|
+
logging.Handler.__init__(self)
|
|
227
|
+
self.task_id = task_id
|
|
228
|
+
self.input = input
|
|
229
|
+
|
|
230
|
+
def emit(self, record):
|
|
231
|
+
conn = db.ENGINE.connect()
|
|
232
|
+
try:
|
|
233
|
+
# Turn strings into unicode to stop SQLAlchemy
|
|
234
|
+
# "Unicode type received non-unicode bind param value" warnings.
|
|
235
|
+
message = str(record.getMessage())
|
|
236
|
+
level = str(record.levelname)
|
|
237
|
+
module = str(record.module)
|
|
238
|
+
funcName = str(record.funcName)
|
|
239
|
+
|
|
240
|
+
conn.execute(
|
|
241
|
+
db.LOGS_TABLE.insert().values(
|
|
242
|
+
job_id=self.task_id,
|
|
243
|
+
timestamp=datetime.datetime.utcnow(),
|
|
244
|
+
message=message,
|
|
245
|
+
level=level,
|
|
246
|
+
module=module,
|
|
247
|
+
funcName=funcName,
|
|
248
|
+
lineno=record.lineno,
|
|
249
|
+
)
|
|
250
|
+
)
|
|
251
|
+
except:
|
|
252
|
+
pass
|
|
253
|
+
finally:
|
|
254
|
+
conn.close()
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class DatetimeJsonEncoder(json.JSONEncoder):
|
|
258
|
+
# Custom JSON encoder
|
|
259
|
+
def default(self, obj):
|
|
260
|
+
if isinstance(obj, datetime.datetime):
|
|
261
|
+
return obj.isoformat()
|
|
262
|
+
return json.JSONEncoder.default(self, obj)
|
|
File without changes
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{% extends "package/base.html" %}
|
|
2
|
+
|
|
3
|
+
{% set logged_in = true if c.userobj else false %}
|
|
4
|
+
{% set res = resource %}
|
|
5
|
+
|
|
6
|
+
{% block breadcrumb_content_selected %}{% endblock %}
|
|
7
|
+
|
|
8
|
+
{% block breadcrumb_content %}
|
|
9
|
+
{{ super() }}
|
|
10
|
+
{% if res %}
|
|
11
|
+
<li>{% link_for h.resource_display_name(res)|truncate(30), named_route=pkg.type ~ '_resource.read', id=pkg.name,
|
|
12
|
+
resource_id=res.id %}</li>
|
|
13
|
+
<li{% block breadcrumb_edit_selected %} class="active" {% endblock %}><a href="">{{ _('Map') }}</a></li>
|
|
14
|
+
{% endif %}
|
|
15
|
+
{% endblock %}
|
|
16
|
+
|
|
17
|
+
{% block content_action %}
|
|
18
|
+
{% if res %}
|
|
19
|
+
{% link_for _('View resource'), named_route=pkg.type ~ '_resource.read', id=pkg.name, resource_id=res.id, class_='btn
|
|
20
|
+
btn-default', icon='eye' %}
|
|
21
|
+
{% endif %}
|
|
22
|
+
{% endblock %}
|
|
23
|
+
|
|
24
|
+
{% block content_primary_nav %}
|
|
25
|
+
{% endblock %}
|
|
26
|
+
|
|
27
|
+
{% block primary_content_inner %}
|
|
28
|
+
<h1>{% block form_title %}{{ _('Map resource') }}{% endblock %}</h1>
|
|
29
|
+
<p>Create a rule bases mapping by filling the form below. It will query the given metadata file and the graph template
|
|
30
|
+
by the Class IRI set for subjects and objects. When clicking "Start Mapping" select widgets will spawn allowing you
|
|
31
|
+
to map a subject to an object. The resulting YAML file will contain a ruleset for each of the assertions made, and
|
|
32
|
+
when run create triples connecting the subject meeting the condition by the predicate IRI given. Download the file,
|
|
33
|
+
make changes if needed and upload it to the "mappings" group here in CKAN if you want to make use of the automated
|
|
34
|
+
mapping process applying the mapping.</p>
|
|
35
|
+
{% block form %}
|
|
36
|
+
{% endblock %}
|
|
37
|
+
<iframe class="col-12" name="my-iframe"
|
|
38
|
+
src="{{iframe_url}}"" onload='javascript:(function(o){o.style.height=o.contentWindow.document.body.scrollHeight+"
|
|
39
|
+
px";}(this));' style="height:1100px;width:100%;border:none;overflow:hidden;"></iframe>
|
|
40
|
+
|
|
41
|
+
{% endblock %}
|
|
42
|
+
|
|
43
|
+
{% block secondary_content %}
|
|
44
|
+
{% snippet 'package/snippets/resource_info.html', res=res %}
|
|
45
|
+
{% snippet 'package/snippets/resources.html', pkg=pkg, active=res.id %}
|
|
46
|
+
{% endblock %}
|
|
47
|
+
|
|
48
|
+
{% block scripts %}
|
|
49
|
+
{{ super() }}
|
|
50
|
+
<!-- <script>
|
|
51
|
+
document.addEventListener("DOMContentLoaded", function() {
|
|
52
|
+
document.getElementById("my-form").submit();
|
|
53
|
+
});
|
|
54
|
+
</script> -->
|
|
55
|
+
{% asset 'vendor/fileupload' %}
|
|
56
|
+
{% endblock %}
|