cognite-neat 0.85.7__py3-none-any.whl → 0.85.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

@@ -38,7 +38,11 @@ class RelationshipsExtractor(BaseExtractor):
38
38
  namespace: Namespace | None = None,
39
39
  ):
40
40
  return cls(
41
- cast(Iterable[Relationship], client.relationships(data_set_external_ids=data_set_external_id)), namespace
41
+ cast(
42
+ Iterable[Relationship],
43
+ client.relationships(data_set_external_ids=data_set_external_id),
44
+ ),
45
+ namespace,
42
46
  )
43
47
 
44
48
  @classmethod
@@ -48,26 +52,25 @@ class RelationshipsExtractor(BaseExtractor):
48
52
  def extract(self) -> Iterable[Triple]:
49
53
  """Extracts an asset with the given asset_id."""
50
54
  for relationship in self.relationships:
51
- yield from self._relationship2triples(relationship, self.namespace)
55
+ yield from self._relationship2triples(relationship)
52
56
 
53
- @classmethod
54
- def _relationship2triples(cls, relationship: Relationship, namespace: Namespace) -> list[Triple]:
57
+ def _relationship2triples(self, relationship: Relationship) -> list[Triple]:
55
58
  """Converts an asset to triples."""
56
59
 
57
60
  if relationship.external_id and relationship.source_external_id and relationship.target_external_id:
58
61
  # relationships do not have an internal id, so we generate one
59
- id_ = namespace[f"Relationship_{create_sha256_hash(relationship.external_id)}"]
62
+ id_ = self.namespace[f"Relationship_{create_sha256_hash(relationship.external_id)}"]
60
63
 
61
64
  # Set rdf type
62
- triples: list[Triple] = [(id_, RDF.type, namespace["Relationship"])]
65
+ triples: list[Triple] = [(id_, RDF.type, self.namespace["Relationship"])]
63
66
 
64
67
  # Set source and target types
65
68
  if source_type := relationship.source_type:
66
69
  triples.append(
67
70
  (
68
71
  id_,
69
- namespace.source_type,
70
- namespace[source_type.title()],
72
+ self.namespace.source_type,
73
+ self.namespace[source_type.title()],
71
74
  )
72
75
  )
73
76
 
@@ -75,19 +78,19 @@ class RelationshipsExtractor(BaseExtractor):
75
78
  triples.append(
76
79
  (
77
80
  id_,
78
- namespace.target_type,
79
- namespace[target_type.title()],
81
+ self.namespace.target_type,
82
+ self.namespace[target_type.title()],
80
83
  )
81
84
  )
82
85
 
83
86
  # Create attributes
84
87
 
85
- triples.append((id_, namespace.external_id, Literal(relationship.external_id)))
88
+ triples.append((id_, self.namespace.external_id, Literal(relationship.external_id)))
86
89
 
87
90
  triples.append(
88
91
  (
89
92
  id_,
90
- namespace.source_external_id,
93
+ self.namespace.source_external_id,
91
94
  Literal(relationship.source_external_id),
92
95
  )
93
96
  )
@@ -95,7 +98,7 @@ class RelationshipsExtractor(BaseExtractor):
95
98
  triples.append(
96
99
  (
97
100
  id_,
98
- namespace.target_external_id,
101
+ self.namespace.target_external_id,
99
102
  Literal(relationship.target_external_id),
100
103
  )
101
104
  )
@@ -104,7 +107,7 @@ class RelationshipsExtractor(BaseExtractor):
104
107
  triples.append(
105
108
  (
106
109
  id_,
107
- namespace.start_time,
110
+ self.namespace.start_time,
108
111
  Literal(datetime.fromtimestamp(relationship.start_time / 1000, timezone.utc)),
109
112
  )
110
113
  )
@@ -113,7 +116,7 @@ class RelationshipsExtractor(BaseExtractor):
113
116
  triples.append(
114
117
  (
115
118
  id_,
116
- namespace.end_time,
119
+ self.namespace.end_time,
117
120
  Literal(datetime.fromtimestamp(relationship.end_time / 1000, timezone.utc)),
118
121
  )
119
122
  )
@@ -122,7 +125,7 @@ class RelationshipsExtractor(BaseExtractor):
122
125
  triples.append(
123
126
  (
124
127
  id_,
125
- namespace.created_time,
128
+ self.namespace.created_time,
126
129
  Literal(datetime.fromtimestamp(relationship.created_time / 1000, timezone.utc)),
127
130
  )
128
131
  )
@@ -131,7 +134,7 @@ class RelationshipsExtractor(BaseExtractor):
131
134
  triples.append(
132
135
  (
133
136
  id_,
134
- namespace.last_updated_time,
137
+ self.namespace.last_updated_time,
135
138
  Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, timezone.utc)),
136
139
  )
137
140
  )
@@ -140,7 +143,7 @@ class RelationshipsExtractor(BaseExtractor):
140
143
  triples.append(
141
144
  (
142
145
  id_,
143
- namespace.confidence,
146
+ self.namespace.confidence,
144
147
  Literal(relationship.confidence),
145
148
  )
146
149
  )
@@ -149,11 +152,23 @@ class RelationshipsExtractor(BaseExtractor):
149
152
  for label in relationship.labels:
150
153
  # external_id can create ill-formed URIs, so we create websafe URIs
151
154
  # since labels do not have internal ids, we use the external_id as the id
152
- triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
155
+ triples.append(
156
+ (
157
+ id_,
158
+ self.namespace.label,
159
+ self.namespace[f"Label_{quote(label.dump()['externalId'])}"],
160
+ )
161
+ )
153
162
 
154
163
  # Create connection
155
164
  if relationship.data_set_id:
156
- triples.append((id_, namespace.dataset, namespace[f"Dataset_{relationship.data_set_id}"]))
165
+ triples.append(
166
+ (
167
+ id_,
168
+ self.namespace.dataset,
169
+ self.namespace[f"Dataset_{relationship.data_set_id}"],
170
+ )
171
+ )
157
172
 
158
173
  return triples
159
174
  return []
@@ -1,3 +1,4 @@
1
+ import json
1
2
  from collections.abc import Iterable
2
3
  from datetime import datetime, timezone
3
4
  from pathlib import Path
@@ -20,15 +21,19 @@ class SequencesExtractor(BaseExtractor):
20
21
  Args:
21
22
  sequence (Iterable[Sequence]): An iterable of sequences.
22
23
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
24
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
25
+ a JSON string.
23
26
  """
24
27
 
25
28
  def __init__(
26
29
  self,
27
30
  sequence: Iterable[Sequence],
28
31
  namespace: Namespace | None = None,
32
+ unpack_metadata: bool = True,
29
33
  ):
30
34
  self.namespace = namespace or DEFAULT_NAMESPACE
31
35
  self.sequence = sequence
36
+ self.unpack_metadata = unpack_metadata
32
37
 
33
38
  @classmethod
34
39
  def from_dataset(
@@ -36,50 +41,71 @@ class SequencesExtractor(BaseExtractor):
36
41
  client: CogniteClient,
37
42
  data_set_external_id: str,
38
43
  namespace: Namespace | None = None,
44
+ unpack_metadata: bool = True,
39
45
  ):
40
- return cls(cast(Iterable[Sequence], client.sequences(data_set_external_ids=data_set_external_id)), namespace)
46
+ return cls(
47
+ cast(
48
+ Iterable[Sequence],
49
+ client.sequences(data_set_external_ids=data_set_external_id),
50
+ ),
51
+ namespace,
52
+ unpack_metadata,
53
+ )
41
54
 
42
55
  @classmethod
43
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
44
- return cls(SequenceList.load(Path(file_path).read_text()), namespace)
56
+ def from_file(
57
+ cls,
58
+ file_path: str,
59
+ namespace: Namespace | None = None,
60
+ unpack_metadata: bool = True,
61
+ ):
62
+ return cls(SequenceList.load(Path(file_path).read_text()), namespace, unpack_metadata)
45
63
 
46
64
  def extract(self) -> Iterable[Triple]:
47
65
  """Extract sequences as triples."""
48
66
  for sequence in self.sequence:
49
- yield from self._sequence2triples(sequence, self.namespace)
67
+ yield from self._sequence2triples(sequence)
50
68
 
51
- @classmethod
52
- def _sequence2triples(cls, sequence: Sequence, namespace: Namespace) -> list[Triple]:
53
- id_ = namespace[f"Sequence_{sequence.id}"]
69
+ def _sequence2triples(self, sequence: Sequence) -> list[Triple]:
70
+ id_ = self.namespace[f"Sequence_{sequence.id}"]
54
71
 
55
72
  # Set rdf type
56
- triples: list[Triple] = [(id_, RDF.type, namespace.Sequence)]
73
+ triples: list[Triple] = [(id_, RDF.type, self.namespace.Sequence)]
57
74
 
58
75
  # Create attributes
59
76
 
60
77
  if sequence.external_id:
61
- triples.append((id_, namespace.external_id, Literal(sequence.external_id)))
78
+ triples.append((id_, self.namespace.external_id, Literal(sequence.external_id)))
62
79
 
63
80
  if sequence.name:
64
- triples.append((id_, namespace.name, Literal(sequence.name)))
81
+ triples.append((id_, self.namespace.name, Literal(sequence.name)))
65
82
 
66
83
  if sequence.metadata:
67
- for key, value in sequence.metadata.items():
68
- if value:
69
- type_aware_value = string_to_ideal_type(value)
70
- try:
71
- triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
72
- except ValidationError:
73
- triples.append((id_, namespace[key], Literal(type_aware_value)))
84
+ if self.unpack_metadata:
85
+ for key, value in sequence.metadata.items():
86
+ if value:
87
+ type_aware_value = string_to_ideal_type(value)
88
+ try:
89
+ triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
90
+ except ValidationError:
91
+ triples.append((id_, self.namespace[key], Literal(type_aware_value)))
92
+ else:
93
+ triples.append(
94
+ (
95
+ id_,
96
+ self.namespace.metadata,
97
+ Literal(json.dumps(sequence.metadata)),
98
+ )
99
+ )
74
100
 
75
101
  if sequence.description:
76
- triples.append((id_, namespace.description, Literal(sequence.description)))
102
+ triples.append((id_, self.namespace.description, Literal(sequence.description)))
77
103
 
78
104
  if sequence.created_time:
79
105
  triples.append(
80
106
  (
81
107
  id_,
82
- namespace.created_time,
108
+ self.namespace.created_time,
83
109
  Literal(datetime.fromtimestamp(sequence.created_time / 1000, timezone.utc)),
84
110
  )
85
111
  )
@@ -88,15 +114,27 @@ class SequencesExtractor(BaseExtractor):
88
114
  triples.append(
89
115
  (
90
116
  id_,
91
- namespace.last_updated_time,
117
+ self.namespace.last_updated_time,
92
118
  Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, timezone.utc)),
93
119
  )
94
120
  )
95
121
 
96
122
  if sequence.data_set_id:
97
- triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{sequence.data_set_id}"]))
123
+ triples.append(
124
+ (
125
+ id_,
126
+ self.namespace.data_set_id,
127
+ self.namespace[f"Dataset_{sequence.data_set_id}"],
128
+ )
129
+ )
98
130
 
99
131
  if sequence.asset_id:
100
- triples.append((id_, namespace.asset, namespace[f"Asset_{sequence.asset_id}"]))
132
+ triples.append(
133
+ (
134
+ id_,
135
+ self.namespace.asset,
136
+ self.namespace[f"Asset_{sequence.asset_id}"],
137
+ )
138
+ )
101
139
 
102
140
  return triples
@@ -1,3 +1,4 @@
1
+ import json
1
2
  from collections.abc import Iterable
2
3
  from datetime import datetime, timezone
3
4
  from pathlib import Path
@@ -20,15 +21,19 @@ class TimeSeriesExtractor(BaseExtractor):
20
21
  Args:
21
22
  timeseries (Iterable[TimeSeries]): An iterable of timeseries.
22
23
  namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
24
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
25
+ a JSON string.
23
26
  """
24
27
 
25
28
  def __init__(
26
29
  self,
27
30
  timeseries: Iterable[TimeSeries],
28
31
  namespace: Namespace | None = None,
32
+ unpack_metadata: bool = True,
29
33
  ):
30
34
  self.namespace = namespace or DEFAULT_NAMESPACE
31
35
  self.timeseries = timeseries
36
+ self.unpack_metadata = unpack_metadata
32
37
 
33
38
  @classmethod
34
39
  def from_dataset(
@@ -36,65 +41,84 @@ class TimeSeriesExtractor(BaseExtractor):
36
41
  client: CogniteClient,
37
42
  data_set_external_id: str,
38
43
  namespace: Namespace | None = None,
44
+ unpack_metadata: bool = True,
39
45
  ):
40
46
  return cls(
41
- cast(Iterable[TimeSeries], client.time_series(data_set_external_ids=data_set_external_id)), namespace
47
+ cast(
48
+ Iterable[TimeSeries],
49
+ client.time_series(data_set_external_ids=data_set_external_id),
50
+ ),
51
+ namespace,
52
+ unpack_metadata,
42
53
  )
43
54
 
44
55
  @classmethod
45
- def from_file(cls, file_path: str, namespace: Namespace | None = None):
46
- return cls(TimeSeriesList.load(Path(file_path).read_text()), namespace)
56
+ def from_file(
57
+ cls,
58
+ file_path: str,
59
+ namespace: Namespace | None = None,
60
+ unpack_metadata: bool = True,
61
+ ):
62
+ return cls(TimeSeriesList.load(Path(file_path).read_text()), namespace, unpack_metadata)
47
63
 
48
64
  def extract(self) -> Iterable[Triple]:
49
65
  """Extract timeseries as triples."""
50
66
  for timeseries in self.timeseries:
51
- yield from self._timeseries2triples(timeseries, self.namespace)
67
+ yield from self._timeseries2triples(timeseries)
52
68
 
53
- @classmethod
54
- def _timeseries2triples(cls, timeseries: TimeSeries, namespace: Namespace) -> list[Triple]:
55
- id_ = namespace[f"TimeSeries_{timeseries.id}"]
69
+ def _timeseries2triples(self, timeseries: TimeSeries) -> list[Triple]:
70
+ id_ = self.namespace[f"TimeSeries_{timeseries.id}"]
56
71
 
57
72
  # Set rdf type
58
- triples: list[Triple] = [(id_, RDF.type, namespace.TimeSeries)]
73
+ triples: list[Triple] = [(id_, RDF.type, self.namespace.TimeSeries)]
59
74
 
60
75
  # Create attributes
61
76
 
62
77
  if timeseries.external_id:
63
- triples.append((id_, namespace.external_id, Literal(timeseries.external_id)))
78
+ triples.append((id_, self.namespace.external_id, Literal(timeseries.external_id)))
64
79
 
65
80
  if timeseries.name:
66
- triples.append((id_, namespace.name, Literal(timeseries.name)))
81
+ triples.append((id_, self.namespace.name, Literal(timeseries.name)))
67
82
 
68
83
  if timeseries.is_string:
69
- triples.append((id_, namespace.is_string, Literal(timeseries.is_string)))
84
+ triples.append((id_, self.namespace.is_string, Literal(timeseries.is_string)))
70
85
 
71
86
  if timeseries.metadata:
72
- for key, value in timeseries.metadata.items():
73
- if value:
74
- type_aware_value = string_to_ideal_type(value)
75
- try:
76
- triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
77
- except ValidationError:
78
- triples.append((id_, namespace[key], Literal(type_aware_value)))
87
+ if self.unpack_metadata:
88
+ for key, value in timeseries.metadata.items():
89
+ if value:
90
+ type_aware_value = string_to_ideal_type(value)
91
+ try:
92
+ triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
93
+ except ValidationError:
94
+ triples.append((id_, self.namespace[key], Literal(type_aware_value)))
95
+ else:
96
+ triples.append(
97
+ (
98
+ id_,
99
+ self.namespace.metadata,
100
+ Literal(json.dumps(timeseries.metadata)),
101
+ )
102
+ )
79
103
 
80
104
  if timeseries.unit:
81
- triples.append((id_, namespace.unit, Literal(timeseries.unit)))
105
+ triples.append((id_, self.namespace.unit, Literal(timeseries.unit)))
82
106
 
83
- if namespace.is_step:
84
- triples.append((id_, namespace.is_step, Literal(timeseries.is_step)))
107
+ if self.namespace.is_step:
108
+ triples.append((id_, self.namespace.is_step, Literal(timeseries.is_step)))
85
109
 
86
110
  if timeseries.description:
87
- triples.append((id_, namespace.description, Literal(timeseries.description)))
111
+ triples.append((id_, self.namespace.description, Literal(timeseries.description)))
88
112
 
89
113
  if timeseries.security_categories:
90
114
  for category in timeseries.security_categories:
91
- triples.append((id_, namespace.security_categories, Literal(category)))
115
+ triples.append((id_, self.namespace.security_categories, Literal(category)))
92
116
 
93
117
  if timeseries.created_time:
94
118
  triples.append(
95
119
  (
96
120
  id_,
97
- namespace.created_time,
121
+ self.namespace.created_time,
98
122
  Literal(datetime.fromtimestamp(timeseries.created_time / 1000, timezone.utc)),
99
123
  )
100
124
  )
@@ -103,26 +127,50 @@ class TimeSeriesExtractor(BaseExtractor):
103
127
  triples.append(
104
128
  (
105
129
  id_,
106
- namespace.last_updated_time,
130
+ self.namespace.last_updated_time,
107
131
  Literal(datetime.fromtimestamp(timeseries.last_updated_time / 1000, timezone.utc)),
108
132
  )
109
133
  )
110
134
 
111
135
  if timeseries.legacy_name:
112
- triples.append((id_, namespace.legacy_name, Literal(timeseries.legacy_name)))
136
+ triples.append((id_, self.namespace.legacy_name, Literal(timeseries.legacy_name)))
113
137
 
114
138
  # Create connections
115
139
  if timeseries.unit_external_id:
116
140
  # try to create connection to QUDT unit catalog
117
141
  try:
118
- triples.append((id_, namespace.unit_external_id, URIRef(str(AnyHttpUrl(timeseries.unit_external_id)))))
142
+ triples.append(
143
+ (
144
+ id_,
145
+ self.namespace.unit_external_id,
146
+ URIRef(str(AnyHttpUrl(timeseries.unit_external_id))),
147
+ )
148
+ )
119
149
  except ValidationError:
120
- triples.append((id_, namespace.unit_external_id, Literal(timeseries.unit_external_id)))
150
+ triples.append(
151
+ (
152
+ id_,
153
+ self.namespace.unit_external_id,
154
+ Literal(timeseries.unit_external_id),
155
+ )
156
+ )
121
157
 
122
158
  if timeseries.data_set_id:
123
- triples.append((id_, namespace.dataset, namespace[f"Dataset_{timeseries.data_set_id}"]))
159
+ triples.append(
160
+ (
161
+ id_,
162
+ self.namespace.dataset,
163
+ self.namespace[f"Dataset_{timeseries.data_set_id}"],
164
+ )
165
+ )
124
166
 
125
167
  if timeseries.asset_id:
126
- triples.append((id_, namespace.asset, namespace[f"Asset_{timeseries.asset_id}"]))
168
+ triples.append(
169
+ (
170
+ id_,
171
+ self.namespace.asset,
172
+ self.namespace[f"Asset_{timeseries.asset_id}"],
173
+ )
174
+ )
127
175
 
128
176
  return triples
@@ -17,7 +17,11 @@ from cognite.neat.rules.models.information import (
17
17
  InformationMetadata,
18
18
  InformationRulesInput,
19
19
  )
20
- from cognite.neat.utils.utils import get_namespace, remove_namespace_from_uri, uri_to_short_form
20
+ from cognite.neat.utils.utils import (
21
+ get_namespace,
22
+ remove_namespace_from_uri,
23
+ uri_to_short_form,
24
+ )
21
25
 
22
26
  ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
23
27
  WHERE { ?s a ?class . }
@@ -25,9 +29,21 @@ ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
25
29
 
26
30
  INSTANCES_OF_CLASS_QUERY = """SELECT ?s WHERE { ?s a <class> . }"""
27
31
 
32
+ INSTANCE_PROPERTIES_JSON_DEFINITION = """SELECT ?property (count(?property) as ?occurrence) ?dataType ?objectType
33
+ WHERE {<instance_id> ?property ?value .
34
+
35
+ BIND(IF(REGEX(?value, "^\u007b(.*)\u007d$"),
36
+ <http://www.w3.org/2001/XMLSchema#json>,
37
+ datatype(?value)) AS ?dataType)
38
+
39
+ OPTIONAL {?value rdf:type ?objectType .}}
40
+ GROUP BY ?property ?dataType ?objectType"""
41
+
28
42
  INSTANCE_PROPERTIES_DEFINITION = """SELECT ?property (count(?property) as ?occurrence) ?dataType ?objectType
29
43
  WHERE {<instance_id> ?property ?value .
44
+
30
45
  BIND(datatype(?value) AS ?dataType)
46
+
31
47
  OPTIONAL {?value rdf:type ?objectType .}}
32
48
  GROUP BY ?property ?dataType ?objectType"""
33
49
 
@@ -36,28 +52,57 @@ class InferenceImporter(BaseImporter):
36
52
  """Infers rules from a triple store.
37
53
 
38
54
  Rules inference through analysis of knowledge graph provided in various formats.
39
- Use the factory methods to create an triples store from sources such as
55
+ Use the factory methods to create a triple store from sources such as
40
56
  RDF files, JSON files, YAML files, XML files, or directly from a graph store.
41
57
 
42
58
  Args:
43
59
  issue_list: Issue list to store issues
44
60
  graph: Knowledge graph
45
61
  max_number_of_instance: Maximum number of instances to be used in inference
62
+ prefix: Prefix to be used for the inferred model
63
+ check_for_json_string: Check if values are JSON strings
46
64
  """
47
65
 
48
- def __init__(self, issue_list: IssueList, graph: Graph, max_number_of_instance: int = -1):
66
+ def __init__(
67
+ self,
68
+ issue_list: IssueList,
69
+ graph: Graph,
70
+ max_number_of_instance: int = -1,
71
+ prefix: str = "inferred",
72
+ check_for_json_string: bool = False,
73
+ ) -> None:
49
74
  self.issue_list = issue_list
50
75
  self.graph = graph
51
76
  self.max_number_of_instance = max_number_of_instance
77
+ self.prefix = prefix
78
+ self.check_for_json_string = check_for_json_string
52
79
 
53
80
  @classmethod
54
- def from_graph_store(cls, store: NeatGraphStore, max_number_of_instance: int = -1):
81
+ def from_graph_store(
82
+ cls,
83
+ store: NeatGraphStore,
84
+ max_number_of_instance: int = -1,
85
+ prefix: str = "inferred",
86
+ check_for_json_string: bool = False,
87
+ ) -> "InferenceImporter":
55
88
  issue_list = IssueList(title="Inferred from graph store")
56
89
 
57
- return cls(issue_list, store.graph, max_number_of_instance=max_number_of_instance)
90
+ return cls(
91
+ issue_list,
92
+ store.graph,
93
+ max_number_of_instance=max_number_of_instance,
94
+ prefix=prefix,
95
+ check_for_json_string=check_for_json_string,
96
+ )
58
97
 
59
98
  @classmethod
60
- def from_rdf_file(cls, filepath: Path, max_number_of_instance: int = -1):
99
+ def from_rdf_file(
100
+ cls,
101
+ filepath: Path,
102
+ max_number_of_instance: int = -1,
103
+ prefix: str = "inferred",
104
+ check_for_json_string: bool = False,
105
+ ) -> "InferenceImporter":
61
106
  issue_list = IssueList(title=f"'{filepath.name}'")
62
107
 
63
108
  graph = Graph()
@@ -66,18 +111,42 @@ class InferenceImporter(BaseImporter):
66
111
  except Exception:
67
112
  issue_list.append(issues.fileread.FileReadError(filepath))
68
113
 
69
- return cls(issue_list, graph, max_number_of_instance=max_number_of_instance)
114
+ return cls(
115
+ issue_list,
116
+ graph,
117
+ max_number_of_instance=max_number_of_instance,
118
+ prefix=prefix,
119
+ check_for_json_string=check_for_json_string,
120
+ )
70
121
 
71
122
  @classmethod
72
- def from_json_file(cls, filepath: Path, max_number_of_instance: int = -1):
123
+ def from_json_file(
124
+ cls,
125
+ filepath: Path,
126
+ max_number_of_instance: int = -1,
127
+ prefix: str = "inferred",
128
+ check_for_json_string: bool = False,
129
+ ) -> "InferenceImporter":
73
130
  raise NotImplementedError("JSON file format is not supported yet.")
74
131
 
75
132
  @classmethod
76
- def from_yaml_file(cls, filepath: Path, max_number_of_instance: int = -1):
133
+ def from_yaml_file(
134
+ cls,
135
+ filepath: Path,
136
+ max_number_of_instance: int = -1,
137
+ prefix: str = "inferred",
138
+ check_for_json_string: bool = False,
139
+ ) -> "InferenceImporter":
77
140
  raise NotImplementedError("YAML file format is not supported yet.")
78
141
 
79
142
  @classmethod
80
- def from_xml_file(cls, filepath: Path, max_number_of_instance: int = -1):
143
+ def from_xml_file(
144
+ cls,
145
+ filepath: Path,
146
+ max_number_of_instance: int = -1,
147
+ prefix: str = "inferred",
148
+ check_for_json_string: bool = False,
149
+ ) -> "InferenceImporter":
81
150
  raise NotImplementedError("JSON file format is not supported yet.")
82
151
 
83
152
  @overload
@@ -135,6 +204,7 @@ class InferenceImporter(BaseImporter):
135
204
  properties: dict[str, dict] = {}
136
205
  prefixes: dict[str, Namespace] = PREFIXES.copy()
137
206
 
207
+ query = INSTANCE_PROPERTIES_JSON_DEFINITION if self.check_for_json_string else INSTANCE_PROPERTIES_DEFINITION
138
208
  # Adds default namespace to prefixes
139
209
  prefixes[self._default_metadata().prefix] = self._default_metadata().namespace
140
210
 
@@ -162,7 +232,7 @@ class InferenceImporter(BaseImporter):
162
232
  + f" LIMIT {self.max_number_of_instance}"
163
233
  ):
164
234
  for property_uri, occurrence, data_type_uri, object_type_uri in self.graph.query( # type: ignore[misc]
165
- INSTANCE_PROPERTIES_DEFINITION.replace("instance_id", instance)
235
+ query.replace("instance_id", instance)
166
236
  ): # type: ignore[misc]
167
237
  property_id = remove_namespace_from_uri(property_uri)
168
238
  self._add_uri_namespace_to_prefixes(cast(URIRef, property_uri), prefixes)
@@ -239,8 +309,7 @@ class InferenceImporter(BaseImporter):
239
309
  if Namespace(get_namespace(URI)) not in prefixes.values():
240
310
  prefixes[f"prefix-{len(prefixes)+1}"] = Namespace(get_namespace(URI))
241
311
 
242
- @classmethod
243
- def _default_metadata(cls):
312
+ def _default_metadata(self):
244
313
  return InformationMetadata(
245
314
  name="Inferred Model",
246
315
  creator="NEAT",
@@ -248,7 +317,7 @@ class InferenceImporter(BaseImporter):
248
317
  created=datetime.now(),
249
318
  updated=datetime.now(),
250
319
  description="Inferred model from knowledge graph",
251
- prefix="inferred",
320
+ prefix=self.prefix,
252
321
  namespace=DEFAULT_NAMESPACE,
253
322
  )
254
323
 
@@ -262,12 +331,9 @@ class InferenceImporter(BaseImporter):
262
331
 
263
332
  @classmethod
264
333
  def _read_value_type_occurrence_from_comment(cls, value_type: str, comment: str) -> int:
265
- return int(
266
- cast(
267
- re.Match,
268
- re.search(
269
- rf"with value type <{value_type}> which occurs <(\d+)> times in the graph",
270
- comment,
271
- ),
272
- ).group(1)
273
- )
334
+ if result := re.search(
335
+ rf"with value type <{value_type}> which occurs <(\d+)> times in the graph",
336
+ comment,
337
+ ):
338
+ return int(result.group(1))
339
+ return 0