maplib 0.19.3__cp310-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maplib/.gitignore +1 -0
- maplib/__init__.py +75 -0
- maplib/__init__.pyi +1023 -0
- maplib/adding_triples.py +29 -0
- maplib/maplib.pyd +0 -0
- maplib/py.typed +0 -0
- maplib/template_generator/__init__.py +1 -0
- maplib/template_generator/generate.py +234 -0
- maplib/template_generator/ordering.py +40 -0
- maplib-0.19.3.dist-info/METADATA +207 -0
- maplib-0.19.3.dist-info/RECORD +13 -0
- maplib-0.19.3.dist-info/WHEEL +4 -0
- maplib-0.19.3.dist-info/licenses/LICENSE +202 -0
maplib/adding_triples.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from maplib.maplib import Model, Template, IRI, Triple, Variable
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def add_triples(
|
|
5
|
+
source: Model, target: Model, source_graph: str = None, target_graph: str = None
|
|
6
|
+
):
|
|
7
|
+
"""(Zero) copy the triples from one Model into another.
|
|
8
|
+
|
|
9
|
+
:param source: The source model
|
|
10
|
+
:param target: The target model
|
|
11
|
+
:param source_graph: The named graph in the source model to copy from. None means default graph.
|
|
12
|
+
:param target_graph: The named graph in the target model to copy into. None means default graph.
|
|
13
|
+
"""
|
|
14
|
+
for p in source.get_predicate_iris(source_graph):
|
|
15
|
+
subject = Variable("subject")
|
|
16
|
+
object = Variable("object")
|
|
17
|
+
template = Template(
|
|
18
|
+
iri=IRI("urn:maplib:tmp"),
|
|
19
|
+
parameters=[subject, object],
|
|
20
|
+
instances=[Triple(subject, p, object)],
|
|
21
|
+
)
|
|
22
|
+
sms = source.get_predicate(p, source_graph)
|
|
23
|
+
for sm in sms:
|
|
24
|
+
target.map(
|
|
25
|
+
template,
|
|
26
|
+
sm.mappings,
|
|
27
|
+
types=sm.rdf_types,
|
|
28
|
+
graph=target_graph,
|
|
29
|
+
)
|
maplib/maplib.pyd
ADDED
|
Binary file
|
maplib/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .generate import generate_templates
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
from typing import Dict, Optional
|
|
2
|
+
from .ordering import topological_sort
|
|
3
|
+
import polars as pl
|
|
4
|
+
|
|
5
|
+
from maplib import Model, Variable, RDFType, Parameter, Triple, IRI, Template
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def generate_templates(m: Model, graph: Optional[str]) -> Dict[str, Template]:
|
|
9
|
+
"""Generate templates for instantiating the classes in an ontology
|
|
10
|
+
|
|
11
|
+
:param m: The model where the ontology is stored. We mainly rely on rdfs:subClassOf, rdfs:range and rdfs:domain.
|
|
12
|
+
:param graph: The named graph where the ontology is stored.
|
|
13
|
+
|
|
14
|
+
:return A dictionary of templates for instantiating the classes in the ontology, where the keys are the class URIs.
|
|
15
|
+
|
|
16
|
+
Usage example - note that it is important to add the templates to the Model you want to populate.
|
|
17
|
+
>>> from maplib import Model, create_templates
|
|
18
|
+
>>>
|
|
19
|
+
>>> m_ont = Model()
|
|
20
|
+
>>> m_ont.read("my_ontology.ttl")
|
|
21
|
+
>>> templates = generate_templates(m_ont)
|
|
22
|
+
>>> m = Model()
|
|
23
|
+
>>> for t in templates.values():
|
|
24
|
+
>>> m.add_template(t)
|
|
25
|
+
>>> m.map("https://example.net/MyClass", df)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
properties = get_properties(m, graph=graph)
|
|
29
|
+
properties_by_domain = {}
|
|
30
|
+
properties_by_range = {}
|
|
31
|
+
for r in properties.iter_rows(named=True):
|
|
32
|
+
dom = r["domain"]
|
|
33
|
+
if dom in properties_by_domain:
|
|
34
|
+
properties_by_domain[dom].append(r)
|
|
35
|
+
else:
|
|
36
|
+
properties_by_domain[dom] = [r]
|
|
37
|
+
|
|
38
|
+
ran = r["range"]
|
|
39
|
+
if ran in properties_by_range:
|
|
40
|
+
properties_by_range[ran].append(r)
|
|
41
|
+
else:
|
|
42
|
+
properties_by_range[ran] = [r]
|
|
43
|
+
|
|
44
|
+
subclasses = get_subclasses(m, graph=graph)
|
|
45
|
+
|
|
46
|
+
subclass_of = {}
|
|
47
|
+
for r in (
|
|
48
|
+
subclasses.group_by("child")
|
|
49
|
+
.agg(pl.col("parent").alias("parents"))
|
|
50
|
+
.iter_rows(named=True)
|
|
51
|
+
):
|
|
52
|
+
subclass_of[r["child"]] = r["parents"]
|
|
53
|
+
|
|
54
|
+
class_ordering = topological_sort(subclasses)
|
|
55
|
+
|
|
56
|
+
templates_without_typing = generate_templates_without_typing(
|
|
57
|
+
properties_by_domain, properties_by_range, class_ordering, subclass_of
|
|
58
|
+
)
|
|
59
|
+
templates_with_typing = generate_templates_with_typing(templates_without_typing)
|
|
60
|
+
templates = {}
|
|
61
|
+
for t, template in templates_without_typing.items():
|
|
62
|
+
|
|
63
|
+
templates[t + "_notype"] = template
|
|
64
|
+
for t, template in templates_with_typing.items():
|
|
65
|
+
templates[t] = template
|
|
66
|
+
|
|
67
|
+
return templates
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_properties(m: Model, graph: Optional[str]) -> pl.DataFrame:
|
|
71
|
+
properties = m.query(
|
|
72
|
+
"""
|
|
73
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
74
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
75
|
+
|
|
76
|
+
SELECT ?property ?property_type ?domain ?range WHERE {
|
|
77
|
+
?property a ?property_type .
|
|
78
|
+
?property rdfs:domain ?domain .
|
|
79
|
+
?property rdfs:range ?range .
|
|
80
|
+
FILTER(ISIRI(?domain) && ISIRI(?range))
|
|
81
|
+
}
|
|
82
|
+
""",
|
|
83
|
+
native_dataframe=True,
|
|
84
|
+
graph=graph,
|
|
85
|
+
)
|
|
86
|
+
pl.Config.set_fmt_str_lengths(100)
|
|
87
|
+
properties_by_subclass_restriction = m.query(
|
|
88
|
+
"""
|
|
89
|
+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
|
90
|
+
PREFIX owl: <http://www.w3.org/2002/07/owl#>
|
|
91
|
+
|
|
92
|
+
SELECT ?property ?property_type ?domain WHERE {
|
|
93
|
+
?domain rdfs:subClassOf ?restr .
|
|
94
|
+
?restr a owl:Restriction .
|
|
95
|
+
?restr owl:onProperty ?property .
|
|
96
|
+
#?property a ?property_type .
|
|
97
|
+
}
|
|
98
|
+
""",
|
|
99
|
+
native_dataframe=True,
|
|
100
|
+
graph=graph,
|
|
101
|
+
)
|
|
102
|
+
properties_by_subclass_restriction = (
|
|
103
|
+
properties_by_subclass_restriction.with_columns(
|
|
104
|
+
pl.lit("http://www.w3.org/2000/01/rdf-schema#ObjectType").alias(
|
|
105
|
+
"property_type"
|
|
106
|
+
),
|
|
107
|
+
pl.lit(None).cast(pl.String).alias("range"),
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
if properties.height == 0:
|
|
111
|
+
return properties_by_subclass_restriction
|
|
112
|
+
elif properties_by_subclass_restriction.height == 0:
|
|
113
|
+
return properties
|
|
114
|
+
else:
|
|
115
|
+
return properties.vstack(properties_by_subclass_restriction)
|
|
116
|
+
|
|
117
|
+
def get_subclasses(m: Model, graph: Optional[str]) -> pl.DataFrame:
|
|
118
|
+
subclasses = m.query(
|
|
119
|
+
"""
|
|
120
|
+
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
|
|
121
|
+
SELECT ?child ?parent WHERE {
|
|
122
|
+
?child rdfs:subClassOf ?parent .
|
|
123
|
+
FILTER(ISIRI(?child) && ISIRI(?parent))
|
|
124
|
+
}
|
|
125
|
+
""",
|
|
126
|
+
native_dataframe=True,
|
|
127
|
+
graph=graph,
|
|
128
|
+
)
|
|
129
|
+
return subclasses
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def uri_to_variable(uri: str) -> Variable:
|
|
133
|
+
split = uri.split("/")
|
|
134
|
+
name = split[-1].split("#")[-1]
|
|
135
|
+
cleaned_name = name.replace("-", "_").replace(".", "_")
|
|
136
|
+
return Variable(cleaned_name)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def generate_templates_without_typing(
|
|
140
|
+
properties_by_domain,
|
|
141
|
+
properties_by_range,
|
|
142
|
+
class_ordering,
|
|
143
|
+
subclass_of: Dict[str, str],
|
|
144
|
+
) -> Dict[str, Template]:
|
|
145
|
+
templates = {}
|
|
146
|
+
i = 0
|
|
147
|
+
subj = get_subj_var()
|
|
148
|
+
|
|
149
|
+
for c in class_ordering:
|
|
150
|
+
i += 1
|
|
151
|
+
|
|
152
|
+
parameters = []
|
|
153
|
+
instances = []
|
|
154
|
+
existing_preds = set()
|
|
155
|
+
existing_varnames = set()
|
|
156
|
+
|
|
157
|
+
# Check dupe!!
|
|
158
|
+
subj_parameter = Parameter(
|
|
159
|
+
variable=subj, optional=False, rdf_type=RDFType.IRI
|
|
160
|
+
)
|
|
161
|
+
parameters.append(subj_parameter)
|
|
162
|
+
if c in properties_by_domain:
|
|
163
|
+
for p in properties_by_domain[c]:
|
|
164
|
+
if p["property"] in existing_preds:
|
|
165
|
+
# print("dupe: ", str(p))
|
|
166
|
+
continue
|
|
167
|
+
existing_preds.add(p["property"])
|
|
168
|
+
v = uri_to_variable(p["property"])
|
|
169
|
+
existing_varnames.add(v.name)
|
|
170
|
+
if p["property_type"] == "http://www.w3.org/2002/07/owl#ObjectProperty":
|
|
171
|
+
t = RDFType.IRI
|
|
172
|
+
elif p["range"]:
|
|
173
|
+
t = RDFType.Literal(p["range"])
|
|
174
|
+
else:
|
|
175
|
+
t = None
|
|
176
|
+
param = Parameter(variable=v, optional=True, rdf_type=t)
|
|
177
|
+
parameters.append(param)
|
|
178
|
+
predicate = IRI(p["property"])
|
|
179
|
+
triple = Triple(subj, predicate, v)
|
|
180
|
+
instances.append(triple)
|
|
181
|
+
if c in properties_by_range:
|
|
182
|
+
for p in properties_by_range[c]:
|
|
183
|
+
if p["property"] in existing_preds:
|
|
184
|
+
# print("dupe: ", str(p))
|
|
185
|
+
continue
|
|
186
|
+
existing_preds.add(p["property"])
|
|
187
|
+
v = uri_to_variable(p["property"])
|
|
188
|
+
existing_varnames.add(v.name)
|
|
189
|
+
t = RDFType.IRI
|
|
190
|
+
param = Parameter(variable=v, optional=True, rdf_type=t)
|
|
191
|
+
parameters.append(param)
|
|
192
|
+
predicate = IRI(p["property"])
|
|
193
|
+
triple = Triple(v, predicate, subj)
|
|
194
|
+
instances.append(triple)
|
|
195
|
+
c_tpl_iri = c + "_notype"
|
|
196
|
+
if c in subclass_of:
|
|
197
|
+
for sc in subclass_of[c]:
|
|
198
|
+
if sc in templates:
|
|
199
|
+
sct = templates[sc]
|
|
200
|
+
variables = []
|
|
201
|
+
for p in sct.parameters:
|
|
202
|
+
if p.variable.name not in existing_varnames:
|
|
203
|
+
variables.append(p.variable)
|
|
204
|
+
if p.variable.name != "id":
|
|
205
|
+
parameters.append(p)
|
|
206
|
+
else:
|
|
207
|
+
variables.append(None)
|
|
208
|
+
# print(f"Duplicate variable: {str(p.variable.name)}")
|
|
209
|
+
instances.append(sct.instance(variables))
|
|
210
|
+
|
|
211
|
+
tpl = Template(IRI(c_tpl_iri), parameters=parameters, instances=instances)
|
|
212
|
+
templates[c] = tpl
|
|
213
|
+
return templates
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def generate_templates_with_typing(
|
|
217
|
+
templates: Dict[str, Template],
|
|
218
|
+
) -> Dict[str, Template]:
|
|
219
|
+
subj = get_subj_var()
|
|
220
|
+
with_type_templates = {}
|
|
221
|
+
for t, template in templates.items():
|
|
222
|
+
instances = template.instances
|
|
223
|
+
instances.append(
|
|
224
|
+
Triple(subj, IRI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), IRI(t))
|
|
225
|
+
)
|
|
226
|
+
new_template = Template(
|
|
227
|
+
iri=IRI(t), parameters=template.parameters, instances=instances
|
|
228
|
+
)
|
|
229
|
+
with_type_templates[t] = new_template
|
|
230
|
+
return with_type_templates
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def get_subj_var() -> Variable:
|
|
234
|
+
return Variable("id")
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def topological_sort(subclasses: pl.DataFrame) -> List[str]:
|
|
6
|
+
in_degree_df = subclasses.group_by("child").agg([pl.col("parent").count()])
|
|
7
|
+
in_degree_dict = {}
|
|
8
|
+
for d in in_degree_df.iter_rows(named=True):
|
|
9
|
+
in_degree_dict[d["child"]] = d["parent"]
|
|
10
|
+
|
|
11
|
+
out_edges_df = subclasses.group_by("parent").agg([pl.col("child")])
|
|
12
|
+
out_edges_dict = {}
|
|
13
|
+
for d in out_edges_df.iter_rows(named=True):
|
|
14
|
+
out_edges_dict[d["parent"]] = d["child"]
|
|
15
|
+
|
|
16
|
+
sorting = []
|
|
17
|
+
|
|
18
|
+
# First, we can add those parents who have no parents.
|
|
19
|
+
for p in subclasses.select("parent").unique().iter_rows(named=True):
|
|
20
|
+
if p["parent"] not in in_degree_dict:
|
|
21
|
+
sorting.append(p["parent"])
|
|
22
|
+
for out_edge in out_edges_dict[p["parent"]]:
|
|
23
|
+
in_degree_dict[out_edge] = in_degree_dict[out_edge] - 1
|
|
24
|
+
|
|
25
|
+
# Next, a topological sort defines ordering.
|
|
26
|
+
while len(in_degree_dict) > 0:
|
|
27
|
+
found_p = None
|
|
28
|
+
for p, i in in_degree_dict.items():
|
|
29
|
+
if i == 0:
|
|
30
|
+
found_p = p
|
|
31
|
+
break
|
|
32
|
+
if found_p is None:
|
|
33
|
+
assert False, "Error in the algorithm.. "
|
|
34
|
+
|
|
35
|
+
sorting.append(found_p)
|
|
36
|
+
in_degree_dict.pop(found_p)
|
|
37
|
+
if found_p in out_edges_dict:
|
|
38
|
+
for out_edge in out_edges_dict[found_p]:
|
|
39
|
+
in_degree_dict[out_edge] = in_degree_dict[out_edge] - 1
|
|
40
|
+
return sorting
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: maplib
|
|
3
|
+
Version: 0.19.3
|
|
4
|
+
Classifier: Development Status :: 4 - Beta
|
|
5
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
6
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Classifier: Programming Language :: Rust
|
|
11
|
+
Classifier: Topic :: Database :: Database Engines/Servers
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering
|
|
13
|
+
Requires-Dist: polars>=0.20.13
|
|
14
|
+
Requires-Dist: pyarrow>=7.0.0
|
|
15
|
+
Requires-Dist: fastapi[standard]>=0.115
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Summary: Dataframe-based interactive knowledge graph construction
|
|
18
|
+
Keywords: rdf,graph,dataframe,sparql,ottr
|
|
19
|
+
Author-email: Magnus Bakken <magnus@data-treehouse.com>
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
22
|
+
Project-URL: Homepage, https://github.com/DataTreehouse/maplib
|
|
23
|
+
Project-URL: Documentation, https://datatreehouse.github.io/maplib/maplib/maplib.html
|
|
24
|
+
Project-URL: Repository, https://github.com/DataTreehouse/maplib
|
|
25
|
+
Project-URL: Changelog, https://github.com/DataTreehouse/maplib/releases
|
|
26
|
+
|
|
27
|
+
## maplib: High-performance RDF knowledge graph construction, SHACL validation and SPARQL-based enrichment in Python
|
|
28
|
+
maplib is a knowledge graph construction library for building RDF knowledge graphs using template expansion ([OTTR](https://ottr.xyz/) Templates). Maplib features SPARQL- and SHACL-engines that are available as the graph is being constructed, allowing enrichment and validation. It can construct and validate knowledge graphs with millions of nodes in seconds.
|
|
29
|
+
|
|
30
|
+
maplib allows you to leverage your existing skills with Pandas or Polars to extract and wrangle data from existing databases and spreadsheets, before applying simple templates to them to build a knowledge graph.
|
|
31
|
+
|
|
32
|
+
Template expansion is typically zero-copy and nearly instantaneous, and the built-in SPARQL and SHACL engines means you can query, inspect, enrich and validate the knowledge graph immediately.
|
|
33
|
+
|
|
34
|
+
maplib is written in Rust, it is built on [Apache Arrow](https://arrow.apache.org/) using [Pola.rs](https://www.pola.rs/) and uses libraries from [Oxigraph](https://github.com/oxigraph/oxigraph) for handling linked data as well as parsing SPARQL queries.
|
|
35
|
+
|
|
36
|
+
## Installing
|
|
37
|
+
The package is published on [PyPi](https://pypi.org/project/maplib/) and the API documented [here](https://datatreehouse.github.io/maplib/maplib.html):
|
|
38
|
+
```shell
|
|
39
|
+
pip install maplib
|
|
40
|
+
```
|
|
41
|
+
Please send us a message, e.g. on LinkedIn (search for Data Treehouse) or on our [webpage](https://www.data-treehouse.com/contact-8) if you want to try out SHACL.
|
|
42
|
+
|
|
43
|
+
## Model
|
|
44
|
+
We can easily map DataFrames to RDF-graphs using the Python library. Below is a reproduction of the example in the paper [1]. Assume that we have a DataFrame given by:
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import polars as pl
|
|
48
|
+
pl.Config.set_fmt_str_lengths(150)
|
|
49
|
+
|
|
50
|
+
pi = "https://github.com/DataTreehouse/maplib/pizza#"
|
|
51
|
+
df = pl.DataFrame({
|
|
52
|
+
"p":[pi + "Hawaiian", pi + "Grandiosa"],
|
|
53
|
+
"c":[pi + "CAN", pi + "NOR"],
|
|
54
|
+
"ings": [[pi + "Pineapple", pi + "Ham"],
|
|
55
|
+
[pi + "Pepper", pi + "Meat"]]
|
|
56
|
+
})
|
|
57
|
+
print(df)
|
|
58
|
+
```
|
|
59
|
+
That is, our DataFrame is:
|
|
60
|
+
|
|
61
|
+
| p | c | ings |
|
|
62
|
+
|-------------------------------|--------------------------------|------------------------------------------|
|
|
63
|
+
| str | str | list[str] |
|
|
64
|
+
| "https://.../pizza#Hawaiian" | "https://.../maplib/pizza#CAN" | [".../pizza#Pineapple", ".../pizza#Ham"] |
|
|
65
|
+
| "https://.../pizza#Grandiosa" | "https://.../maplib/pizza#NOR" | [".../pizza#Pepper", ".../pizza#Meat"] |
|
|
66
|
+
|
|
67
|
+
Then we can define a OTTR template, and create our knowledge graph by expanding this template with our DataFrame as input:
|
|
68
|
+
```python
|
|
69
|
+
from maplib import Model, Prefix, Template, Argument, Parameter, Variable, RDFType, Triple, a
|
|
70
|
+
pi = Prefix(pi)
|
|
71
|
+
|
|
72
|
+
p_var = Variable("p")
|
|
73
|
+
c_var = Variable("c")
|
|
74
|
+
ings_var = Variable("ings")
|
|
75
|
+
|
|
76
|
+
template = Template(
|
|
77
|
+
iri= pi.suf("PizzaTemplate"),
|
|
78
|
+
parameters= [
|
|
79
|
+
Parameter(variable=p_var, rdf_type=RDFType.IRI()),
|
|
80
|
+
Parameter(variable=c_var, rdf_type=RDFType.IRI()),
|
|
81
|
+
Parameter(variable=ings_var, rdf_type=RDFType.Nested(RDFType.IRI()))
|
|
82
|
+
],
|
|
83
|
+
instances= [
|
|
84
|
+
Triple(p_var, a, pi.suf("Pizza")),
|
|
85
|
+
Triple(p_var, pi.suf("fromCountry"), c_var),
|
|
86
|
+
Triple(
|
|
87
|
+
p_var,
|
|
88
|
+
pi.suf("hasIngredient"),
|
|
89
|
+
Argument(term=ings_var, list_expand=True),
|
|
90
|
+
list_expander="cross")
|
|
91
|
+
]
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
m = Model()
|
|
95
|
+
m.map(template, df)
|
|
96
|
+
hpizzas = """
|
|
97
|
+
PREFIX pi:<https://github.com/DataTreehouse/maplib/pizza#>
|
|
98
|
+
CONSTRUCT { ?p a pi:HeterodoxPizza }
|
|
99
|
+
WHERE {
|
|
100
|
+
?p a pi:Pizza .
|
|
101
|
+
?p pi:hasIngredient pi:Pineapple .
|
|
102
|
+
}"""
|
|
103
|
+
m.insert(hpizzas)
|
|
104
|
+
return m
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
We can immediately query the mapped knowledge graph:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
m.query("""
|
|
111
|
+
PREFIX pi:<https://github.com/DataTreehouse/maplib/pizza#>
|
|
112
|
+
SELECT ?p ?i WHERE {
|
|
113
|
+
?p a pi:Pizza .
|
|
114
|
+
?p pi:hasIngredient ?i .
|
|
115
|
+
}
|
|
116
|
+
""")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
The query gives the following result (a DataFrame):
|
|
120
|
+
|
|
121
|
+
| p | i |
|
|
122
|
+
|---------------------------------|---------------------------------------|
|
|
123
|
+
| str | str |
|
|
124
|
+
| "<https://.../pizza#Grandiosa>" | "<https://.../pizza#Meat>" |
|
|
125
|
+
| "<https://.../pizza#Grandiosa>" | "<https://.../pizza#Pepper>" |
|
|
126
|
+
| "<https://.../pizza#Hawaiian>" | "<https://.../pizza#Pineapple>" |
|
|
127
|
+
| "<https://.../pizza#Hawaiian>" | "<https://.../pizza#Ham>" |
|
|
128
|
+
|
|
129
|
+
Next, we are able to perform a construct query, which creates new triples but does not insert them.
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
hpizzas = """
|
|
133
|
+
PREFIX pi:<https://github.com/DataTreehouse/maplib/pizza#>
|
|
134
|
+
CONSTRUCT { ?p a pi:UnorthodoxPizza }
|
|
135
|
+
WHERE {
|
|
136
|
+
?p a pi:Pizza .
|
|
137
|
+
?p pi:hasIngredient pi:Pineapple .
|
|
138
|
+
}"""
|
|
139
|
+
res = m.query(hpizzas)
|
|
140
|
+
res[0]
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
The resulting triples are given below:
|
|
144
|
+
|
|
145
|
+
| subject | verb | object |
|
|
146
|
+
|--------------------------------|--------------------------------------|---------------------------------------|
|
|
147
|
+
| str | str | str |
|
|
148
|
+
| "<https://.../pizza#Hawaiian>" | "<http://.../22-rdf-syntax-ns#type>" | "<https://.../pizza#UnorthodoxPizza>" |
|
|
149
|
+
|
|
150
|
+
If we are happy with the output of this construct-query, we can insert it in the model state. Afterwards we check that the triple is added with a query.
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
m.insert(hpizzas)
|
|
154
|
+
m.query("""
|
|
155
|
+
PREFIX pi:<https://github.com/DataTreehouse/maplib/pizza#>
|
|
156
|
+
|
|
157
|
+
SELECT ?p WHERE {
|
|
158
|
+
?p a pi:UnorthodoxPizza
|
|
159
|
+
}
|
|
160
|
+
""")
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Indeed, we have added the triple:
|
|
164
|
+
|
|
165
|
+
| p |
|
|
166
|
+
|------------------------------------------------------------|
|
|
167
|
+
| str |
|
|
168
|
+
| "<https://github.com/DataTreehouse/maplib/pizza#Hawaiian>" |
|
|
169
|
+
|
|
170
|
+
## API
|
|
171
|
+
The [API](https://datatreehouse.github.io/maplib/maplib.html) is simple, and contains only one class and a few methods for:
|
|
172
|
+
- expanding templates
|
|
173
|
+
- querying with SPARQL
|
|
174
|
+
- validating with SHACL
|
|
175
|
+
- importing triples (Turtle, RDF/XML, NTriples)
|
|
176
|
+
- writing triples (Turtle, RDF/XML, NTriples)
|
|
177
|
+
- creating a new Model object (sprout) based on queries over the current Model object.
|
|
178
|
+
|
|
179
|
+
The API is documented [HERE](https://datatreehouse.github.io/maplib/maplib.html)
|
|
180
|
+
|
|
181
|
+
## Roadmap of features and optimizations
|
|
182
|
+
Spring 2025
|
|
183
|
+
- Datalog reasoning support ✅
|
|
184
|
+
- Reduced memory footprint ✅
|
|
185
|
+
- Further SPARQL optimizations
|
|
186
|
+
- JSON-LD support
|
|
187
|
+
|
|
188
|
+
Fall 2025
|
|
189
|
+
- SHACL rules support
|
|
190
|
+
- Improved TTL serialization (prettier and faster)
|
|
191
|
+
+++
|
|
192
|
+
|
|
193
|
+
Roadmap is subject to changes,particularly user and customer requests.
|
|
194
|
+
|
|
195
|
+
## References
|
|
196
|
+
There is an associated paper [1] with associated benchmarks showing superior performance and scalability that can be found [here](https://ieeexplore.ieee.org/document/10106242). OTTR is described in [2].
|
|
197
|
+
|
|
198
|
+
[1] M. Bakken, "maplib: Interactive, literal RDF model model for industry," in IEEE Access, doi: 10.1109/ACCESS.2023.3269093.
|
|
199
|
+
|
|
200
|
+
[2] M. G. Skjæveland, D. P. Lupp, L. H. Karlsen, and J. W. Klüwer, “Ottr: Formal templates for pattern-based ontology engineering.” in WOP (Book),
|
|
201
|
+
2021, pp. 349–377.
|
|
202
|
+
|
|
203
|
+
## Licensing
|
|
204
|
+
All code produced since August 1st. 2023 is copyrighted to [Data Treehouse AS](https://www.data-treehouse.com/) with an Apache 2.0 license unless otherwise noted.
|
|
205
|
+
|
|
206
|
+
All code which was produced before August 1st. 2023 copyrighted to [Prediktor AS](https://www.prediktor.com/) with an Apache 2.0 license unless otherwise noted, and has been financed by [The Research Council of Norway](https://www.forskningsradet.no/en/) (grant no. 316656) and [Prediktor AS](https://www.prediktor.com/) as part of a PhD Degree. The code at this state is archived in the repository at [https://github.com/magbak/maplib](https://github.com/magbak/maplib).
|
|
207
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
maplib-0.19.3.dist-info/METADATA,sha256=mwSXi6090Hcuo4h8zNTWVUyIWKtceG7cJNCIIOFeMNU,9258
|
|
2
|
+
maplib-0.19.3.dist-info/WHEEL,sha256=MF1HCxdpioEZC6lqltB_WBzy4tzMPetoB2hZW2ZWEzg,95
|
|
3
|
+
maplib-0.19.3.dist-info/licenses/LICENSE,sha256=8f_rikNX2RHmVhT1CFq1M2itL6kTpawNjNTHUFCB870,11661
|
|
4
|
+
maplib/.gitignore,sha256=88KgwL2QsVFk7EKzNn65u6Z-5ibwf9RPU6J68KuZotY,6
|
|
5
|
+
maplib/__init__.py,sha256=XolwCUgnC2mTW9Z1YvmHdicsV-Z4r3RHr03KwWflMUE,1789
|
|
6
|
+
maplib/__init__.pyi,sha256=DOgWoWRUtgGt0VZA3Z0NIQ_rmpP8Pj3NEwMvU3yxlN8,34745
|
|
7
|
+
maplib/adding_triples.py,sha256=BSQQQs4-XgpGLax9UXC-GKZ2fu6BidvtgsBlRJJqxek,1084
|
|
8
|
+
maplib/maplib.pyd,sha256=nDXa2taPxl1VCk1zXNl9dCHZdIvS0okGmlrwQY8OS3M,97077760
|
|
9
|
+
maplib/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
maplib/template_generator/__init__.py,sha256=9w78SbwEDZY2moDzpiy0axVxIy5qWouoPqIFfLvKXKQ,42
|
|
11
|
+
maplib/template_generator/generate.py,sha256=TjurFj-RyGziryFi0tcXWIqK6LGaaJRk09oSyvITlVg,8314
|
|
12
|
+
maplib/template_generator/ordering.py,sha256=0yvKst8bp0df8cpiCEVljb0WOcvxD9y_XdOpg8SogtU,1468
|
|
13
|
+
maplib-0.19.3.dist-info/RECORD,,
|