maplib 0.14.5__cp39-none-win_amd64.whl → 0.17.8__cp39-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
maplib/__init__.py CHANGED
@@ -1,8 +1,60 @@
1
- import pathlib
1
+ # r'''
2
+ # # Overview
3
+ #
4
+ # '''
5
+
6
+ __all__ = [
7
+ "Model",
8
+ "a",
9
+ "Triple",
10
+ "SolutionModels",
11
+ "IndexingOptions",
12
+ "ValidationReport",
13
+ "Instance",
14
+ "Template",
15
+ "Argument",
16
+ "Parameter",
17
+ "Variable",
18
+ "RDFType",
19
+ "XSD",
20
+ "IRI",
21
+ "Literal",
22
+ "Prefix",
23
+ "BlankNode",
24
+ "explore",
25
+ "add_triples",
26
+ "MaplibException",
27
+ ]
2
28
 
29
+ import pathlib
3
30
  from .maplib import *
4
- from .add_triples import add_triples
5
- PATH_HERE = pathlib.Path(__file__).parent.resolve()
31
+ from .adding_triples import add_triples
32
+
33
+ if (pathlib.Path(__file__).parent.resolve() / "graph_explorer").exists():
34
+ from .graph_explorer import explore
35
+ else:
36
+
37
+ def explore(
38
+ m: "Model",
39
+ host: str = "localhost",
40
+ port: int = 8000,
41
+ bind: str = "localhost",
42
+ popup=True,
43
+ fts=True,
44
+ ):
45
+ """Starts a graph explorer session.
46
+ To run from Jupyter Notebook use:
47
+ >>> from maplib import explore
48
+ >>>
49
+ >>> server = explore(m)
50
+ You can later stop the server with
51
+ >>> server.stop()
6
52
 
7
- if (PATH_HERE / "graph_explorer").exists():
8
- from .graph_explorer import explore
53
+ :param m: The Model to explore
54
+ :param host: The hostname that we will point the browser to.
55
+ :param port: The port where the graph explorer webserver listens on.
56
+ :param bind: Bind to the following host / ip.
57
+ :param popup: Pop up the browser window.
58
+ :param fts: Enable full text search indexing
59
+ """
60
+ print("Contact Data Treehouse to try!")
@@ -68,16 +68,16 @@ class Prefix:
68
68
  A prefix that can be used to ergonomically build iris.
69
69
  """
70
70
 
71
- def __init__(self, prefix, iri):
71
+ def __init__(self, iri, prefix_name=None):
72
72
  """
73
73
  Create a new prefix.
74
- :param prefix: The name of the prefix
75
74
  :param iri: The prefix IRI.
75
+ :param prefix_name: The name of the prefix
76
76
  """
77
77
 
78
78
  def suf(self, suffix: str) -> IRI:
79
79
  """
80
- Create a IRI by appending the suffix.
80
+ Create an IRI by appending the suffix.
81
81
  :param suffix: The suffix to append.
82
82
  :return:
83
83
  """
@@ -250,15 +250,18 @@ class IndexingOptions:
250
250
  Options for indexing
251
251
  """
252
252
 
253
- def __init__(self, enabled:bool=True,
254
- object_sort_all:bool=None,
255
- object_sort_some:List["IRI"]=None):
253
+ def __init__(
254
+ self,
255
+ object_sort_all: bool = None,
256
+ object_sort_some: List["IRI"] = None,
257
+ fts_path: str = None,
258
+ ):
256
259
  """
257
260
  Defaults to indexing on subjects and objects for select types (e.g. rdf:type and rdfs:label)
258
261
 
259
- :param enabled: Enable indexing (this will enable indexing on subjects)
260
262
  :param object_sort_all: Enable object-indexing for all suitable predicates (doubles memory requirement).
261
263
  :param object_sort_some: Enable object-indexing for a selected list of predicates.
264
+ :param fts_path: Enable full text search, stored at the path
262
265
  """
263
266
 
264
267
  ParametersType = Dict[str, Tuple[DataFrame, Dict[str, RDFType]]]
@@ -270,17 +273,26 @@ class ValidationReport:
270
273
  """
271
274
 
272
275
  conforms: bool
276
+ "Whether or not the validation report conforms to the shapes"
277
+
278
+ shape_targets: DataFrame
279
+ "A DataFrame containing the counts of the targets of each shape and constraint"
280
+
281
+ performance: DataFrame
282
+ "Performance statistics for the validation process"
273
283
 
274
284
  def results(
275
285
  self,
276
286
  native_dataframe: bool = False,
277
287
  include_datatypes: bool = False,
278
- ) -> Optional[Union[DataFrame, SolutionMappings]]:
288
+ streaming: bool = False,
289
+ ) -> Optional[Union[DataFrame, "SolutionMappings"]]:
279
290
  """
280
291
  Return the results of the validation report, if they exist.
281
292
 
282
293
  :param native_dataframe: Return columns with maplib-native formatting. Useful for round-trips.
283
294
  :param include_datatypes: Return datatypes of the results DataFrame (returns SolutionMappings instead of DataFrame).
295
+ :param streaming: Use the Polars streaming functionality.
284
296
  :return: The SHACL validation report, as a DataFrame
285
297
  """
286
298
 
@@ -288,6 +300,7 @@ class ValidationReport:
288
300
  self,
289
301
  native_dataframe: bool = False,
290
302
  include_datatypes: bool = False,
303
+ streaming: bool = False,
291
304
  ) -> Optional[DataFrame]:
292
305
  """
293
306
  Returns the details of the validation report.
@@ -295,20 +308,19 @@ class ValidationReport:
295
308
 
296
309
  :param native_dataframe: Return columns with maplib-native formatting. Useful for round-trips.
297
310
  :param include_datatypes: Return datatypes of the results DataFrame (returns SolutionMappings instead of DataFrame).
311
+ :param streaming: Use the Polars streaming functionality.
298
312
  :return: Details of the SHACL validation report, as a DataFrame
299
313
  """
300
314
 
301
- def graph(self, indexing = None) -> "Mapping":
315
+ def graph(self) -> "Mapping":
302
316
  """
303
317
  Creates a new mapping object where the base graph is the validation report with results.
304
318
  Includes the details of the validation report in the new graph if they exist.
305
319
 
306
- :param indexing: Should the constructed graph be indexed?
307
- If not specified it is inherited from the mapping where validate was called.
308
320
  :return:
309
321
  """
310
322
 
311
- class Mapping:
323
+ class Model:
312
324
  """
313
325
  A mapping session allowing:
314
326
 
@@ -318,80 +330,99 @@ class Mapping:
318
330
 
319
331
  Usage:
320
332
 
321
- >>> from maplib import Mapping
333
+ >>> from maplib import Model
322
334
  ... doc = '''
323
335
  ... :prefix ex:<http://example.net/ns#>.
324
336
  ... ex:ExampleTemplate [?MyValue] :: {
325
337
  ... ottr:Triple(ex:myObject, ex:hasValue, ?MyValue)
326
338
  ... } .'''
327
- ... m = Mapping(doc)
339
+ ... m = Model()
340
+ ... m.add_template(doc)
328
341
 
329
342
  :param documents: a stOTTR document or a list of these
330
343
  :param indexing_options: options for indexing
331
344
  """
332
345
 
333
346
  def __init__(
334
- self, documents: Union[str, List[str]] = None, indexing_options: "IndexingOptions" = None
335
- ) -> "Mapping": ...
336
- def add_template(self, template: "Template"):
347
+ self,
348
+ indexing_options: "IndexingOptions" = None,
349
+ ) -> "Model": ...
350
+ def add_template(self, template: Union["Template", str]):
337
351
  """
338
- Add a template to the mapping. Overwrites any existing template with the same IRI.
339
- :param template: The template to add.
352
+ Add a template to the model. Overwrites any existing template with the same IRI.
353
+ :param template: The template to add, as a stOTTR string or as a programmatically constructed Template.
340
354
  :return:
341
355
  """
342
356
 
343
- def expand(
357
+ def map(
344
358
  self,
345
359
  template: Union[str, "Template", IRI],
346
360
  df: DataFrame = None,
347
- unique_subset: List[str] = None,
348
361
  graph: str = None,
349
362
  types: Dict[str, RDFType] = None,
350
363
  validate_iris: bool = True,
351
- validate_unique_subset: bool = False,
352
364
  ) -> None:
353
365
  """
354
- Expand a template using a DataFrame
366
+ Map a template using a DataFrame
355
367
  Usage:
356
368
 
357
- >>> m.expand("ex:ExampleTemplate", df)
358
- ... m.expand("ex:ExampleTemplate", df, unique_subsets=["MyValue"])
369
+ >>> m.map("ex:ExampleTemplate", df)
359
370
 
360
371
  If the template has no arguments, the df argument is not necessary.
361
372
 
362
373
  :param template: Template, IRI, IRI string or prefixed template name.
363
374
  :param df: DataFrame where the columns have the same names as the template arguments
364
- :param unique_subset: DataFrame column names known to be unique e.g. ["colA", "colB"], for a performance boost (reduce costly deduplication)
365
375
  :param graph: The IRI of the graph to add triples to.
376
+ :param types: The types of the columns.
366
377
  :param validate_iris: Validate any IRI-columns.
367
- :param validate_unique_subset: Check that provided unique subset actually is unique.
378
+ """
379
+
380
+ def map_triples(
381
+ self,
382
+ df: DataFrame = None,
383
+ predicate: str = None,
384
+ graph: str = None,
385
+ types: Dict[str, RDFType] = None,
386
+ validate_iris: bool = True,
387
+ ) -> None:
388
+ """
389
+ Map a template using a DataFrame with columns subject, object and predicate
390
+ The predicate column can also be supplied as a string if it is the same for all rows.
391
+ Usage:
392
+
393
+ >>> m.map_triples(df)
394
+
395
+ If the template has no arguments, the df argument is not necessary.
396
+
397
+ :param df: DataFrame where the columns are named subject and object. May also contain a verb-column.
398
+ :param verb: The uri of the verb.
399
+ :param graph: The IRI of the graph to add triples to.
368
400
  :param types: The types of the columns.
401
+ :param validate_iris: Validate any IRI-columns.
369
402
  """
370
403
 
371
- def expand_default(
404
+ def map_default(
372
405
  self,
373
406
  df: DataFrame,
374
407
  primary_key_column: str,
375
- template_prefix: str = None,
376
- predicate_uri_prefix: str = None,
408
+ dry_run: bool = False,
377
409
  graph: str = None,
410
+ types: Dict[str, RDFType] = None,
378
411
  validate_iris: bool = True,
379
- validate_unique_subset: bool = False,
380
412
  ) -> str:
381
413
  """
382
- Create a default template and expand it based on a dataframe.
414
+ Create a default template and map it based on a dataframe.
383
415
  Usage:
384
416
 
385
- >>> template_string = m.expand_default(df, "myKeyCol")
417
+ >>> template_string = m.map_default(df, "myKeyCol")
386
418
  ... print(template_string)
387
419
 
388
420
  :param df: DataFrame where the columns have the same names as the template arguments
389
421
  :param primary_key_column: This column will be the subject of all triples in the generated template.
390
- :param template_prefix: Prefix of the template - the name is auto-generated.
391
- :param predicate_uri_prefix: Prefix of the predicates/verbs in the generated template, names are derived from column names.
422
+ :param dry_run: Do not map the template, only return the string.
392
423
  :param graph: The IRI of the graph to add triples to.
424
+ :param types: The types of the columns.
393
425
  :param validate_iris: Validate any IRI-columns.
394
- :param validate_unique_subset: Check that provided unique subset actually is unique.
395
426
  :return: The generated template
396
427
  """
397
428
 
@@ -404,6 +435,7 @@ class Mapping:
404
435
  graph: str = None,
405
436
  streaming: bool = False,
406
437
  return_json: bool = False,
438
+ include_transient: bool = True,
407
439
  ) -> Union[
408
440
  DataFrame, SolutionMappings, List[Union[DataFrame, SolutionMappings, str]], None
409
441
  ]:
@@ -412,7 +444,7 @@ class Mapping:
412
444
  Currently, SELECT, CONSTRUCT and INSERT are supported.
413
445
  Usage:
414
446
 
415
- >>> df = mapping.query('''
447
+ >>> df = model.query('''
416
448
  ... PREFIX ex:<http://example.net/ns#>
417
449
  ... SELECT ?obj1 ?obj2 WHERE {
418
450
  ... ?obj1 ex:hasObj ?obj2
@@ -426,25 +458,54 @@ class Mapping:
426
458
  :param graph: The IRI of the graph to query.
427
459
  :param streaming: Use Polars streaming
428
460
  :param return_json: Return JSON string.
461
+ :param include_transient: Include transient triples when querying.
429
462
  :return: DataFrame (Select), list of DataFrames (Construct) containing results, or None for Insert-queries
430
463
 
431
464
  """
432
465
 
466
+ def update(
467
+ self,
468
+ update: str,
469
+ parameters: ParametersType = None,
470
+ streaming: bool = False,
471
+ include_transient: bool = True,
472
+ ):
473
+ """
474
+ Insert the results of a Construct query in the graph.
475
+ Useful for being able to use the same query for inspecting what will be inserted and actually inserting.
476
+ Usage:
477
+
478
+ >>> m = Model(doc)
479
+ ... # Omitted
480
+ ... update_pizzas = '''
481
+ ... ...'''
482
+ ... m.update(update_pizzas)
483
+
484
+ :param update: The SPARQL Update string
485
+ :param parameters: PVALUES Parameters, a DataFrame containing the value bindings in the custom PVALUES construction.
486
+ :param streaming: Use Polars streaming
487
+ :param include_transient: Include transient triples when querying (but see "transient" above).
488
+ :return: None
489
+ """
490
+
433
491
  def insert(
434
492
  self,
435
493
  query: str,
436
494
  parameters: ParametersType = None,
495
+ include_datatypes: bool = False,
496
+ native_dataframe: bool = False,
437
497
  transient: bool = False,
438
498
  streaming: bool = False,
439
499
  source_graph: str = None,
440
500
  target_graph: str = None,
501
+ include_transient: bool = True,
441
502
  ):
442
503
  """
443
504
  Insert the results of a Construct query in the graph.
444
505
  Useful for being able to use the same query for inspecting what will be inserted and actually inserting.
445
506
  Usage:
446
507
 
447
- >>> m = Mapping(doc)
508
+ >>> m = Model(doc)
448
509
  ... # Omitted
449
510
  ... hpizzas = '''
450
511
  ... PREFIX pizza:<https://github.com/magbak/maplib/pizza#>
@@ -458,10 +519,13 @@ class Mapping:
458
519
 
459
520
  :param query: The SPARQL Insert query string
460
521
  :param parameters: PVALUES Parameters, a DataFrame containing the value bindings in the custom PVALUES construction.
461
- :param transient: Should the inserted triples be included in exports?
522
+ :param native_dataframe: Return columns with maplib-native formatting. Useful for round-trips.
523
+ :param include_datatypes: Datatypes are not returned by default, set to true to return a dict with the solution mappings and the datatypes.
524
+ :param transient: Should the inserted triples be transient?
462
525
  :param source_graph: The IRI of the source graph to execute the construct query.
463
526
  :param target_graph: The IRI of the target graph to insert into.
464
527
  :param streaming: Use Polars streaming
528
+ :param include_transient: Include transient triples when querying (but see "transient" above).
465
529
  :return: None
466
530
  """
467
531
 
@@ -472,6 +536,10 @@ class Mapping:
472
536
  include_conforms: bool = False,
473
537
  include_shape_graph: bool = True,
474
538
  streaming: bool = False,
539
+ max_shape_constraint_results: int = None,
540
+ only_shapes: List[str] = None,
541
+ deactivate_shapes: List[str] = None,
542
+ dry_run: bool = False,
475
543
  ) -> ValidationReport:
476
544
  """
477
545
  Validate the contained knowledge graph using SHACL
@@ -483,18 +551,21 @@ class Mapping:
483
551
  :param include_shape_graph: Include the shape graph in the report, useful when creating the graph from the report.
484
552
  :param include_datatypes: Return the datatypes of the validation report (and details).
485
553
  :param streaming: Use Polars streaming
554
+ :param max_shape_constraint_results: Maximum number of results per shape and constraint. Reduces the size of the result set.
555
+ :param only_shapes: Validate only these shapes, None means all shapes are validated (must be IRI, cannot be used with deactivate_shapes).
556
+ :param deactivate_shapes: Disable validation of these shapes (must be IRI, cannot be used with deactivate_shapes).
557
+ :param dry_run: Only find targets of shapes, but do not validate them.
486
558
  :return: Validation report containing a report (report.df) and whether the graph conforms (report.conforms)
487
559
  """
488
560
 
489
- def read_triples(
561
+ def read(
490
562
  self,
491
563
  file_path: Union[str, Path],
492
564
  format: LiteralType["ntriples", "turtle", "rdf/xml", "xml", "rdfxml"] = None,
493
565
  base_iri: str = None,
494
566
  transient: bool = False,
495
- parallel: bool = False,
567
+ parallel: bool = None,
496
568
  checked: bool = True,
497
- deduplicate: bool = True,
498
569
  graph: str = None,
499
570
  replace_graph: bool = False,
500
571
  ) -> None:
@@ -506,28 +577,26 @@ class Mapping:
506
577
 
507
578
  Usage:
508
579
 
509
- >>> m.read_triples("my_triples.ttl")
580
+ >>> m.read("my_triples.ttl")
510
581
 
511
582
  :param file_path: The path of the file containing triples
512
583
  :param format: One of "ntriples", "turtle", "rdf/xml", otherwise it is inferred from the file extension.
513
584
  :param base_iri: Base iri
514
585
  :param transient: Should these triples be included when writing the graph to the file system?
515
- :param parallel: Parse triples in parallel, currently only NTRiples. Assumes all prefixes are in the beginning of the document.
586
+ :param parallel: Parse triples in parallel, currently only NTRiples and Turtle. Assumes all prefixes are in the beginning of the document. Defaults to true only for NTriples.
516
587
  :param checked: Check IRIs etc.
517
- :param deduplicate: Set to true by default, disable to increase throughput for large files containing only unique triples.
518
588
  :param graph: The IRI of the graph to read the triples into, if None, it will be the default graph.
519
589
  :param replace_graph: Replace the graph with these triples? Will replace the default graph if no graph is specified.
520
590
  """
521
591
 
522
- def read_triples_string(
592
+ def reads(
523
593
  self,
524
594
  s: str,
525
595
  format: LiteralType["ntriples", "turtle", "rdf/xml", "xml", "rdfxml"],
526
596
  base_iri: str = None,
527
597
  transient: bool = False,
528
- parallel: bool = False,
598
+ parallel: bool = None,
529
599
  checked: bool = True,
530
- deduplicate: bool = True,
531
600
  graph: str = None,
532
601
  replace_graph: bool = False,
533
602
  ) -> None:
@@ -538,66 +607,84 @@ class Mapping:
538
607
 
539
608
  Usage:
540
609
 
541
- >>> m.read_triples(my_ntriples_string, format="ntriples")
610
+ >>> m.reads(my_ntriples_string, format="ntriples")
542
611
 
543
612
  :param s: String containing serialized triples.
544
613
  :param format: One of "ntriples", "turtle", "rdf/xml".
545
614
  :param base_iri: Base iri
546
615
  :param transient: Should these triples be included when writing the graph to the file system?
547
- :param parallel: Parse triples in parallel, currently only NTRiples. Assumes all prefixes are in the beginning of the document.
616
+ :param parallel: Parse triples in parallel, currently only NTRiples and Turtle. Assumes all prefixes are in the beginning of the document. Defaults to true for NTriples.
548
617
  :param checked: Check IRIs etc.
549
- :param deduplicate: Set to true by default, disable to increase throughput for large files containing only unique triples.
550
618
  :param graph: The IRI of the graph to read the triples into.
551
619
  :param replace_graph: Replace the graph with these triples? Will replace the default graph if no graph is specified.
552
620
  """
553
621
 
554
- def write_ntriples(self, file_path: Union[str, Path], graph: str = None) -> None:
622
+ def write_cim_xml(
623
+ self,
624
+ file_path: Union[str, Path],
625
+ profile_graph: str,
626
+ model_iri: str = None,
627
+ version: str = None,
628
+ description: str = None,
629
+ created: str = None,
630
+ scenario_time: str = None,
631
+ modeling_authority_set: str = None,
632
+ prefixes: Dict[str, str] = None,
633
+ graph: str = None,
634
+ ) -> None:
555
635
  """
556
- DEPRECATED: use write_triples with format="ntriples"
557
- Write the non-transient triples to the file path specified in the NTriples format.
558
-
559
- Usage:
560
-
561
- >>> m.write_ntriples("my_triples.nt")
636
+ Write the legacy CIM XML format.
637
+
638
+ >>> PROFILE_GRAPH = "urn:graph:profiles"
639
+ >>> m = Model()
640
+ >>> m.read(model_path, base_iri=publicID, format="rdf/xml")
641
+ >>> m.read("61970-600-2_Equipment-AP-Voc-RDFS2020_v3-0-0.rdf", graph=PROFILE_GRAPH, format="rdf/xml")
642
+ >>> m.read("61970-600-2_Operation-AP-Voc-RDFS2020_v3-0-0.rdf", graph=PROFILE_GRAPH, format="rdf/xml")
643
+ >>> m.write_cim_xml(
644
+ >>> "model.xml",
645
+ >>> profile_graph=PROFILE_GRAPH,
646
+ >>> description = "MyModel",
647
+ >>> created = "2023-09-14T20:27:41",
648
+ >>> scenario_time = "2023-09-14T02:44:43",
649
+ >>> modeling_authority_set="www.westernpower.co.uk",
650
+ >>> version="22",
651
+ >>> )
562
652
 
563
653
  :param file_path: The path of the file containing triples
564
- :param graph: The IRI of the graph to write.
565
- """
566
-
567
- def write_triples(self,
568
- file_path: Union[str, Path],
569
- format=LiteralType["ntriples", "turtle", "rdf/xml"],
570
- graph: str = None,
571
- ) -> None:
654
+ :param profile_graph: The IRI of the graph containing the ontology of the CIM profile to write.
655
+ :param model_iri: model_iri a md:FullModel. Is generated if not provided.
656
+ :param version: model_iri md:Model.version version .
657
+ :param description: model_iri md:Model.description description .
658
+ :param created: model_iri md:Model.created created .
659
+ :param scenario_time: model_iri md:Model.scenarioTime scenario_time .
660
+ :param modeling_authority_set: model_iri md:Model.modelingAuthoritySet modeling_authority_set .
661
+ :param prefixes: Prefixes to be used in XML export.
662
+ :param graph: The graph to write, defaults to the default graph.
663
+ """
664
+
665
+ def write(
666
+ self,
667
+ file_path: Union[str, Path],
668
+ format=LiteralType["ntriples", "turtle", "rdf/xml"],
669
+ graph: str = None,
670
+ ) -> None:
572
671
  """
573
672
  Write the non-transient triples to the file path specified in the NTriples format.
574
673
 
575
674
  Usage:
576
675
 
577
- >>> m.write_triples("my_triples.nt", format="ntriples")
676
+ >>> m.write("my_triples.nt", format="ntriples")
578
677
 
579
678
  :param file_path: The path of the file containing triples
580
679
  :param format: One of "ntriples", "turtle", "rdf/xml".
581
680
  :param graph: The IRI of the graph to write.
582
681
  """
583
682
 
584
-
585
- def write_ntriples_string(self, graph: str = None) -> str:
586
- """
587
- DEPRECATED: use write_triples_string with format="ntriples"
588
- Write the non-transient triples to a string in memory.
589
-
590
- Usage:
591
-
592
- >>> s = m.write_ntriples_string()
593
-
594
- :param graph: The IRI of the graph to write.
595
- :return Triples in mapping in the NTriples format (potentially a large string)
596
- """
597
-
598
- def write_triples_string(self, format=LiteralType["ntriples", "turtle", "rdf/xml"], graph: str = None) -> str:
683
+ def writes(
684
+ self, format=LiteralType["ntriples", "turtle", "rdf/xml"], graph: str = None
685
+ ) -> str:
599
686
  """
600
- DEPRECATED: use write_triples_string with format="ntriples"
687
+ DEPRECATED: use writes with format="ntriples"
601
688
  Write the non-transient triples to a string in memory.
602
689
 
603
690
  Usage:
@@ -606,7 +693,7 @@ class Mapping:
606
693
 
607
694
  :param format: One of "ntriples", "turtle", "rdf/xml".
608
695
  :param graph: The IRI of the graph to write.
609
- :return Triples in mapping in the NTriples format (potentially a large string)
696
+ :return Triples in model in the NTriples format (potentially a large string)
610
697
  """
611
698
 
612
699
  def write_native_parquet(
@@ -626,7 +713,7 @@ class Mapping:
626
713
  def create_sprout(self):
627
714
  """
628
715
  A sprout is a simplified way of dealing with multiple graphs.
629
- See also `maplib.maplib.Mapping.insert_sprout` and `maplib.maplib.Mapping.detach_sprout`
716
+ See also `Model.insert_sprout` and `Model.detach_sprout`
630
717
 
631
718
  :return:
632
719
  """
@@ -635,20 +722,24 @@ class Mapping:
635
722
  self,
636
723
  query: str,
637
724
  parameters: ParametersType = None,
725
+ include_datatypes: bool = False,
726
+ native_dataframe: bool = False,
638
727
  transient: bool = False,
639
728
  streaming: bool = False,
640
729
  source_graph: str = None,
641
730
  target_graph: str = None,
731
+ include_transient: bool = True,
642
732
  ):
643
733
  """
644
734
  Insert the results of a Construct query in a sprouted graph, which is created if no sprout is active.
645
735
  Sprouts are simplified way of dealing with multiple graphs.
646
736
  Useful for being able to use the same query for inspecting what will be inserted and actually inserting.
647
- See also `maplib.maplib.Mapping.detach_sprout`
737
+ See also `Model.detach_sprout`
648
738
 
649
739
  Usage:
650
740
 
651
- >>> m = Mapping(doc)
741
+ >>> m = Model()
742
+ ... m.add_template(doc)
652
743
  ... m.create_sprout()
653
744
  ... # Omitted
654
745
  ... hpizzas = '''
@@ -663,28 +754,35 @@ class Mapping:
663
754
 
664
755
  :param query: The SPARQL Insert query string
665
756
  :param parameters: PVALUES Parameters, a DataFrame containing the value bindings in the custom PVALUES construction.
757
+ :param native_dataframe: Return columns with maplib-native formatting. Useful for round-trips.
758
+ :param include_datatypes: Datatypes are not returned by default, set to true to return a dict with the solution mappings and the datatypes.
666
759
  :param transient: Should the inserted triples be included in exports?
667
760
  :param source_graph: The IRI of the source graph to execute the construct query.
668
761
  :param target_graph: The IRI of the target graph to insert into.
669
762
  :param streaming: Use Polars streaming
763
+ :param include_transient: Include transient triples when querying (see also "transient" above).
670
764
  :return: None
671
765
  """
672
766
 
673
- def detach_sprout(self) -> "Mapping":
767
+ def detach_sprout(self) -> "Model":
674
768
  """
675
- Detaches and returns the sprout from the mapping.
769
+ Detaches and returns the sprout from the model.
676
770
 
677
- @return: The sprout as its own Mapping.
771
+ :return: The sprout as its own Model.
678
772
  """
679
773
 
680
- def get_predicate_iris(self, graph: str = None, include_transient:bool=False) -> List["IRI"]:
774
+ def get_predicate_iris(
775
+ self, graph: str = None, include_transient: bool = False
776
+ ) -> List["IRI"]:
681
777
  """
682
778
  :param graph: The graph to get the predicate iris from.
683
779
  :param include_transient: Should we include predicates only between transient triples?
684
780
  :return: The IRIs of the predicates currently in the given graph.
685
781
  """
686
782
 
687
- def get_predicate(self, iri: "IRI", graph: str=None, include_transient:bool=False) -> List["SolutionMappings"]:
783
+ def get_predicate(
784
+ self, iri: "IRI", graph: str = None, include_transient: bool = False
785
+ ) -> List["SolutionMappings"]:
688
786
  """
689
787
  :param iri: The predicate IRI
690
788
  :param graph: The graph to get the predicate from.
@@ -692,7 +790,9 @@ class Mapping:
692
790
  :return: A list of the underlying tables that store a given predicate.
693
791
  """
694
792
 
695
- def create_index(self, options: "IndexingOptions"=None, all:bool=True, graph: str=None):
793
+ def create_index(
794
+ self, options: "IndexingOptions" = None, all: bool = True, graph: str = None
795
+ ):
696
796
  """
697
797
  :param options: Indexing options
698
798
  :param all: Apply to all existing and new graphs
@@ -700,9 +800,18 @@ class Mapping:
700
800
  :return:
701
801
  """
702
802
 
703
- def initialize(self):
803
+ def infer(
804
+ self,
805
+ ruleset: Union[str, List[str]],
806
+ include_datatypes: bool = False,
807
+ native_dataframe: bool = False,
808
+ ) -> Optional[Dict[str, DataFrame]]:
704
809
  """
705
- Deduplicates and builds indices of all triplestores.
706
- Happens automatically on first query or validation.
707
- :return:
810
+ Run the inference rules that are provided
811
+ :param ruleset: The Datalog ruleset (a string).
812
+ :param native_dataframe: Return columns with maplib-native formatting. Useful for round-trips.
813
+ :param include_datatypes: Datatypes are not returned by default, set to true to return a dict with the solution mappings and the datatypes.
814
+ :return: The inferred N-Tuples.
708
815
  """
816
+
817
+ class MaplibException(Exception): ...
@@ -0,0 +1,29 @@
1
+ from maplib.maplib import Model, Template, IRI, Triple, Variable
2
+
3
+
4
+ def add_triples(
5
+ source: Model, target: Model, source_graph: str = None, target_graph: str = None
6
+ ):
7
+ """(Zero) copy the triples from one Model into another.
8
+
9
+ :param source: The source mapping
10
+ :param target: The target mapping
11
+ :param source_graph: The named graph in the source mapping to copy from. None means default graph.
12
+ :param target_graph: The named graph in the target mapping to copy into. None means default graph.
13
+ """
14
+ for p in source.get_predicate_iris(source_graph):
15
+ subject = Variable("subject")
16
+ object = Variable("object")
17
+ template = Template(
18
+ iri=IRI("urn:maplib:tmp"),
19
+ parameters=[subject, object],
20
+ instances=[Triple(subject, p, object)],
21
+ )
22
+ sms = source.get_predicate(p, source_graph)
23
+ for sm in sms:
24
+ target.map(
25
+ template,
26
+ sm.mappings,
27
+ types=sm.rdf_types,
28
+ graph=target_graph,
29
+ )
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: maplib
3
- Version: 0.14.5
3
+ Version: 0.17.8
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: License :: OSI Approved :: Apache Software License
6
6
  Classifier: Programming Language :: Python :: 3 :: Only
@@ -12,6 +12,8 @@ Classifier: Topic :: Database :: Database Engines/Servers
12
12
  Classifier: Topic :: Scientific/Engineering
13
13
  Requires-Dist: polars >=0.20.13
14
14
  Requires-Dist: pyarrow >=7.0.0
15
+ Requires-Dist: fastapi[standard] >=0.115 ; extra == 'explorer'
16
+ Provides-Extra: explorer
15
17
  License-File: LICENSE
16
18
  Summary: Dataframe-based interactive knowledge graph construction
17
19
  Keywords: rdf,graph,dataframe,sparql,ottr
@@ -33,13 +35,13 @@ Template expansion is typically zero-copy and nearly instantaneous, and the buil
33
35
  maplib is written in Rust, it is built on [Apache Arrow](https://arrow.apache.org/) using [Pola.rs](https://www.pola.rs/) and uses libraries from [Oxigraph](https://github.com/oxigraph/oxigraph) for handling linked data as well as parsing SPARQL queries.
34
36
 
35
37
  ## Installing
36
- The package is published on [PyPi](https://pypi.org/project/maplib/) and the API documented [here](https://datatreehouse.github.io/maplib/maplib/maplib.html):
38
+ The package is published on [PyPi](https://pypi.org/project/maplib/) and the API documented [here](https://datatreehouse.github.io/maplib/maplib.html):
37
39
  ```shell
38
40
  pip install maplib
39
41
  ```
40
42
  Please send us a message, e.g. on LinkedIn (search for Data Treehouse) or on our [webpage](https://www.data-treehouse.com/contact-8) if you want to try out SHACL.
41
43
 
42
- ## Mapping
44
+ ## Model
43
45
  We can easily map DataFrames to RDF-graphs using the Python library. Below is a reproduction of the example in the paper [1]. Assume that we have a DataFrame given by:
44
46
 
45
47
  ```python
@@ -65,8 +67,8 @@ That is, our DataFrame is:
65
67
 
66
68
  Then we can define a OTTR template, and create our knowledge graph by expanding this template with our DataFrame as input:
67
69
  ```python
68
- from maplib import Mapping, Prefix, Template, Argument, Parameter, Variable, RDFType, Triple, a
69
- pi = Prefix("pi", pi)
70
+ from maplib import Model, Prefix, Template, Argument, Parameter, Variable, RDFType, Triple, a
71
+ pi = Prefix(pi)
70
72
 
71
73
  p_var = Variable("p")
72
74
  c_var = Variable("c")
@@ -90,8 +92,8 @@ template = Template(
90
92
  ]
91
93
  )
92
94
 
93
- m = Mapping()
94
- m.expand(template, df)
95
+ m = Model()
96
+ m.map(template, df)
95
97
  hpizzas = """
96
98
  PREFIX pi:<https://github.com/DataTreehouse/maplib/pizza#>
97
99
  CONSTRUCT { ?p a pi:HeterodoxPizza }
@@ -146,7 +148,7 @@ The resulting triples are given below:
146
148
  | str | str | str |
147
149
  | "<https://.../pizza#Hawaiian>" | "<http://.../22-rdf-syntax-ns#type>" | "<https://.../pizza#UnorthodoxPizza>" |
148
150
 
149
- If we are happy with the output of this construct-query, we can insert it in the mapping state. Afterwards we check that the triple is added with a query.
151
+ If we are happy with the output of this construct-query, we can insert it in the model state. Afterwards we check that the triple is added with a query.
150
152
 
151
153
  ```python
152
154
  m.insert(hpizzas)
@@ -167,20 +169,34 @@ Indeed, we have added the triple:
167
169
  | "<https://github.com/DataTreehouse/maplib/pizza#Hawaiian>" |
168
170
 
169
171
  ## API
170
- The [API](https://datatreehouse.github.io/maplib/maplib/maplib.html) is simple, and contains only one class and a few methods for:
172
+ The [API](https://datatreehouse.github.io/maplib/maplib.html) is simple, and contains only one class and a few methods for:
171
173
  - expanding templates
172
174
  - querying with SPARQL
173
- - validating SHACL
175
+ - validating with SHACL
174
176
  - importing triples (Turtle, RDF/XML, NTriples)
175
- - writing triples (NTriples)
176
- - creating a new Mapping object (sprout) based on queries over the current Mapping object.
177
+ - writing triples (Turtle, RDF/XML, NTriples)
178
+ - creating a new Model object (sprout) based on queries over the current Model object.
177
179
 
178
- The API is documented [HERE](https://datatreehouse.github.io/maplib/maplib/maplib.html)
180
+ The API is documented [HERE](https://datatreehouse.github.io/maplib/maplib.html)
181
+
182
+ ## Roadmap of features and optimizations
183
+ Spring 2025
184
+ - Datalog reasoning support ✅
185
+ - Reduced memory footprint ✅
186
+ - Further SPARQL optimizations
187
+ - JSON-LD support
188
+
189
+ Fall 2025
190
+ - SHACL rules support
191
+ - Improved TTL serialization (prettier and faster)
192
+ +++
193
+
194
+ Roadmap is subject to changes,particularly user and customer requests.
179
195
 
180
196
  ## References
181
197
  There is an associated paper [1] with associated benchmarks showing superior performance and scalability that can be found [here](https://ieeexplore.ieee.org/document/10106242). OTTR is described in [2].
182
198
 
183
- [1] M. Bakken, "maplib: Interactive, literal RDF model mapping for industry," in IEEE Access, doi: 10.1109/ACCESS.2023.3269093.
199
+ [1] M. Bakken, "maplib: Interactive, literal RDF model model for industry," in IEEE Access, doi: 10.1109/ACCESS.2023.3269093.
184
200
 
185
201
  [2] M. G. Skjæveland, D. P. Lupp, L. H. Karlsen, and J. W. Klüwer, “Ottr: Formal templates for pattern-based ontology engineering.” in WOP (Book),
186
202
  2021, pp. 349–377.
@@ -0,0 +1,10 @@
1
+ maplib-0.17.8.dist-info/METADATA,sha256=TNHeKU1e66a_C37fU46zC_xdqiBjJvSWOaW218LkXsI,9310
2
+ maplib-0.17.8.dist-info/WHEEL,sha256=lukeIsDTsE1YVI71QKxojI1jBuBmCbLW3hTRwIrKSOQ,94
3
+ maplib-0.17.8.dist-info/licenses/LICENSE,sha256=8f_rikNX2RHmVhT1CFq1M2itL6kTpawNjNTHUFCB870,11661
4
+ maplib/.gitignore,sha256=88KgwL2QsVFk7EKzNn65u6Z-5ibwf9RPU6J68KuZotY,6
5
+ maplib/adding_triples.py,sha256=5SklWdJaCFAUE22l_Na1jLPx2KKO0oirf3nVAF4sFnI,1092
6
+ maplib/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ maplib/__init__.py,sha256=UlMsxGkMj9pMbN_zEeKdTGBGy2bEU-avX1gvd59Z_N0,1443
8
+ maplib/__init__.pyi,sha256=ys2HScdIO4KO7o4nDfKgDE6CVylnf8WHgvVyO36Wwq8,29685
9
+ maplib/maplib.cp39-win_amd64.pyd,sha256=qonrguGroEnkXToXQtCLAXKBzZKx_qRx-sCvtHxhe4k,92394496
10
+ maplib-0.17.8.dist-info/RECORD,,
maplib/add_triples.py DELETED
@@ -1,21 +0,0 @@
1
- from maplib import Mapping, Template, IRI, Triple, Variable
2
-
3
-
4
- def add_triples(source: Mapping, target: Mapping, source_graph: str = None, target_graph: str = None):
5
- for p in source.get_predicate_iris(source_graph):
6
- subject = Variable("subject")
7
- object = Variable("object")
8
- template = Template(
9
- iri=IRI("urn:maplib:tmp"),
10
- parameters=[subject, object],
11
- instances=[Triple(subject, p, object)]
12
- )
13
- sms = source.get_predicate(p, source_graph)
14
- for sm in sms:
15
- target.expand(
16
- template,
17
- sm.mappings,
18
- unique_subset=["subject", "object"],
19
- types=sm.rdf_types,
20
- graph=target_graph
21
- )
@@ -1,10 +0,0 @@
1
- maplib-0.14.5.dist-info/METADATA,sha256=4-xyP3XBQZF_swfo2GEEqZLY2sJ6LbM7PZ5TtBA98Xk,8903
2
- maplib-0.14.5.dist-info/WHEEL,sha256=lukeIsDTsE1YVI71QKxojI1jBuBmCbLW3hTRwIrKSOQ,94
3
- maplib-0.14.5.dist-info/licenses/LICENSE,sha256=8f_rikNX2RHmVhT1CFq1M2itL6kTpawNjNTHUFCB870,11661
4
- maplib/.gitignore,sha256=88KgwL2QsVFk7EKzNn65u6Z-5ibwf9RPU6J68KuZotY,6
5
- maplib/add_triples.py,sha256=HrDGnAaRoEvbU6ZqaI2qulPY-ABksqHwWAYKrM4k5Qo,780
6
- maplib/maplib.pyi,sha256=lZM8PzGzA9cT2IIevBZtbVZoj12lH2gvA2XW0KCtS9g,25191
7
- maplib/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- maplib/__init__.py,sha256=heYCc1Px-RP_VhclBENh3HWMrgibPd2_ZAuSPmItHII,218
9
- maplib/maplib.cp39-win_amd64.pyd,sha256=7NWRWC5nV2DwLKP9D819YJQxujC50h9O7B08vqHUfxc,73763328
10
- maplib-0.14.5.dist-info/RECORD,,