ose-core 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ose/__init__.py +9 -0
- ose/commands/CLICommandContext.py +49 -0
- ose/commands/Command.py +29 -0
- ose/commands/CommandContext.py +25 -0
- ose/commands/ImportExternalCommand.py +87 -0
- ose/commands/ReleaseCommandContext.py +60 -0
- ose/commands/__init__.py +0 -0
- ose/constants.py +3 -0
- ose/database/Base.py +5 -0
- ose/database/NextId.py +10 -0
- ose/database/Release.py +44 -0
- ose/database/User.py +18 -0
- ose/database/__init__.py +17 -0
- ose/database/migrations/README +1 -0
- ose/database/migrations/alembic.ini +50 -0
- ose/database/migrations/env.py +113 -0
- ose/database/migrations/script.py.mako +24 -0
- ose/database/migrations/versions/244abfc3e657_rename_tables.py +22 -0
- ose/database/migrations/versions/487271aa555d_added_release_artifacts.py +33 -0
- ose/database/migrations/versions/5a12a34c96d8_added_release_repo.py +37 -0
- ose/database/migrations/versions/bbe766649a99_initial_migration.py +54 -0
- ose/index/BucketStorage.py +42 -0
- ose/index/ExtendedStorage.py +9 -0
- ose/index/FileStorage.py +7 -0
- ose/index/__init__.py +0 -0
- ose/index/create_index.py +90 -0
- ose/index/schema.py +14 -0
- ose/model/ColumnMapping.py +302 -0
- ose/model/Diff.py +77 -0
- ose/model/ExcelOntology.py +1030 -0
- ose/model/Plugin.py +35 -0
- ose/model/Relation.py +91 -0
- ose/model/ReleaseScript.py +94 -0
- ose/model/RepositoryConfiguration.py +34 -0
- ose/model/Result.py +49 -0
- ose/model/Schema.py +91 -0
- ose/model/Script.py +40 -0
- ose/model/ScriptArgument.py +10 -0
- ose/model/Term.py +131 -0
- ose/model/TermIdentifier.py +50 -0
- ose/model/__init__.py +0 -0
- ose/py.typed +0 -0
- ose/release/BuildReleaseStep.py +77 -0
- ose/release/GithubPublishReleaseStep.py +50 -0
- ose/release/HumanVerificationReleaseStep.py +24 -0
- ose/release/ImportExternalReleaseStep.py +116 -0
- ose/release/ImportExternalWithGitHubActionsReleaseStep.py +22 -0
- ose/release/MergeReleaseStep.py +68 -0
- ose/release/PreparationReleaseStep.py +46 -0
- ose/release/ReleaseStep.py +205 -0
- ose/release/ValidationReleaseStep.py +99 -0
- ose/release/__init__.py +0 -0
- ose/release/common.py +96 -0
- ose/release/do_release.py +114 -0
- ose/services/ConfigurationService.py +76 -0
- ose/services/FileCache.py +153 -0
- ose/services/LocalConfigurationService.py +188 -0
- ose/services/OntoloyBuildService.py +42 -0
- ose/services/PluginService.py +118 -0
- ose/services/RepositoryConfigurationService.py +190 -0
- ose/services/RobotOntologyBuildService.py +412 -0
- ose/services/__init__.py +0 -0
- ose/services/validation.py +15 -0
- ose/utils/__init__.py +8 -0
- ose/utils/github.py +176 -0
- ose/utils/strings.py +42 -0
- ose_core-0.2.5.dist-info/METADATA +21 -0
- ose_core-0.2.5.dist-info/RECORD +70 -0
- ose_core-0.2.5.dist-info/WHEEL +5 -0
- ose_core-0.2.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Initial migration
|
|
2
|
+
|
|
3
|
+
Revision ID: bbe766649a99
|
|
4
|
+
Revises:
|
|
5
|
+
Create Date: 2024-02-29 14:23:07.749071
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
import sqlalchemy as sa
|
|
9
|
+
from alembic import op
|
|
10
|
+
from sqlalchemy.dialects import sqlite
|
|
11
|
+
from sqlalchemy.engine.reflection import Inspector
|
|
12
|
+
|
|
13
|
+
# revision identifiers, used by Alembic.
|
|
14
|
+
revision = 'bbe766649a99'
|
|
15
|
+
down_revision = None
|
|
16
|
+
branch_labels = None
|
|
17
|
+
depends_on = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def downgrade():
|
|
21
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
22
|
+
op.drop_table('release')
|
|
23
|
+
op.drop_table('users')
|
|
24
|
+
# ### end Alembic commands ###
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def upgrade():
|
|
28
|
+
conn = op.get_bind()
|
|
29
|
+
inspector = Inspector.from_engine(conn)
|
|
30
|
+
tables = inspector.get_table_names()
|
|
31
|
+
|
|
32
|
+
if "users" not in tables:
|
|
33
|
+
op.create_table('users',
|
|
34
|
+
sa.Column('id', sa.INTEGER(), nullable=False),
|
|
35
|
+
sa.Column('github_access_token', sa.VARCHAR(length=255), nullable=True),
|
|
36
|
+
sa.Column('github_id', sa.INTEGER(), nullable=True),
|
|
37
|
+
sa.Column('github_login', sa.VARCHAR(length=255), nullable=True),
|
|
38
|
+
sa.PrimaryKeyConstraint('id'))
|
|
39
|
+
|
|
40
|
+
if "release" not in tables:
|
|
41
|
+
op.create_table('release',
|
|
42
|
+
sa.Column('id', sa.INTEGER(), nullable=False),
|
|
43
|
+
sa.Column('state', sa.VARCHAR(length=20), nullable=True),
|
|
44
|
+
sa.Column('running', sa.BOOLEAN(), nullable=True),
|
|
45
|
+
sa.Column('step', sa.INTEGER(), nullable=True),
|
|
46
|
+
sa.Column('details', sqlite.JSON(), nullable=True),
|
|
47
|
+
sa.Column('included_files', sqlite.JSON(), nullable=True),
|
|
48
|
+
sa.Column('start', sa.DATETIME(), nullable=True),
|
|
49
|
+
sa.Column('started_by', sa.VARCHAR(), nullable=True),
|
|
50
|
+
sa.Column('end', sa.DATETIME(), nullable=True),
|
|
51
|
+
sa.Column('release_script', sqlite.JSON(), nullable=True),
|
|
52
|
+
sa.Column('worker_id', sa.VARCHAR(length=20), nullable=True),
|
|
53
|
+
sa.Column('local_dir', sa.VARCHAR(), nullable=True),
|
|
54
|
+
sa.PrimaryKeyConstraint('id'))
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import whoosh.filedb.filestore
|
|
4
|
+
from .ExtendedStorage import ExtendedStorage
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Implementation of Google Cloud Storage for index
|
|
8
|
+
class BucketStorage(ExtendedStorage, whoosh.filedb.filestore.RamStorage):
|
|
9
|
+
_logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
def __init__(self, bucket):
|
|
12
|
+
super().__init__()
|
|
13
|
+
self.bucket = bucket
|
|
14
|
+
self.filenameslist = []
|
|
15
|
+
|
|
16
|
+
def open(self) -> None:
|
|
17
|
+
self.open_from_bucket()
|
|
18
|
+
|
|
19
|
+
def save(self) -> None:
|
|
20
|
+
self.save_to_bucket()
|
|
21
|
+
|
|
22
|
+
def save_to_bucket(self):
|
|
23
|
+
for name in self.files.keys():
|
|
24
|
+
with self.open_file(name) as source:
|
|
25
|
+
self._logger.debug("Saving file %s", name)
|
|
26
|
+
blob = self.bucket.blob(name)
|
|
27
|
+
blob.upload_from_file(source)
|
|
28
|
+
for name in self.filenameslist:
|
|
29
|
+
if name not in self.files.keys():
|
|
30
|
+
blob = self.bucket.blob(name)
|
|
31
|
+
self._logger.debug("Deleting old file %s", name)
|
|
32
|
+
self.bucket.delete_blob(blob.name)
|
|
33
|
+
self.filenameslist.remove(name)
|
|
34
|
+
|
|
35
|
+
def open_from_bucket(self):
|
|
36
|
+
self.filenameslist = []
|
|
37
|
+
for blob in self.bucket.list_blobs():
|
|
38
|
+
self._logger.debug("Opening blob", blob.name)
|
|
39
|
+
self.filenameslist.append(blob.name)
|
|
40
|
+
f = self.create_file(blob.name)
|
|
41
|
+
blob.download_to_file(f)
|
|
42
|
+
f.close()
|
ose/index/FileStorage.py
ADDED
ose/index/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
from typing import List, Union, Dict
|
|
4
|
+
|
|
5
|
+
import openpyxl
|
|
6
|
+
from whoosh.index import FileIndex
|
|
7
|
+
from whoosh.qparser import MultifieldParser
|
|
8
|
+
from whoosh.writing import SegmentWriter
|
|
9
|
+
|
|
10
|
+
_logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
EntityData = Dict[str, str]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def parse_input_sheet(file: Union[str, bytes, io.BytesIO]) -> List[EntityData]:
|
|
16
|
+
"""
|
|
17
|
+
Parses a given Excel file to pairs of headers and data
|
|
18
|
+
|
|
19
|
+
The first line of the Excel file is interpreted as header. All following lines are combined with this header and
|
|
20
|
+
returned.
|
|
21
|
+
|
|
22
|
+
:param file: The Excel file. Either a filename or bytes of the Excel file
|
|
23
|
+
:return: Pairs of headers and data
|
|
24
|
+
"""
|
|
25
|
+
if isinstance(file, bytes):
|
|
26
|
+
file = io.BytesIO(file)
|
|
27
|
+
|
|
28
|
+
wb = openpyxl.load_workbook(file)
|
|
29
|
+
|
|
30
|
+
sheet = wb.active
|
|
31
|
+
data = sheet.rows
|
|
32
|
+
|
|
33
|
+
header = [i.value for i in next(data)]
|
|
34
|
+
|
|
35
|
+
return [{header[i]: cell.value for i, cell in enumerate(row)} for row in data]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def re_write_entity_data_set(repo_name: str, index: FileIndex, sheet_name: str, entity_data: List[EntityData]):
|
|
39
|
+
writer = index.writer(timeout=60) # Wait 60s for the writer lock
|
|
40
|
+
mparser = MultifieldParser(["repo", "spreadsheet"],
|
|
41
|
+
schema=index.schema)
|
|
42
|
+
writer.delete_by_query(mparser.parse("repo:" + repo_name + " AND spreadsheet:" + sheet_name))
|
|
43
|
+
writer.commit()
|
|
44
|
+
writer = index.writer(timeout=60) # Wait 60s for the writer lock
|
|
45
|
+
for data in entity_data:
|
|
46
|
+
add_entity_data_to_index(data, repo_name, sheet_name, writer)
|
|
47
|
+
|
|
48
|
+
writer.commit(optimize=True)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def add_entity_data_to_index(entity_data: EntityData, repo_name: str, sheet_name: str, writer: SegmentWriter):
|
|
52
|
+
rowdata = entity_data
|
|
53
|
+
|
|
54
|
+
if 'Curation status' in entity_data and str(entity_data["Curation status"]) == "Obsolete":
|
|
55
|
+
_logger.info(f"Not adding obsolete entity '{entity_data.get('ID')}' to index")
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
if "ID" in rowdata:
|
|
59
|
+
class_id = rowdata["ID"]
|
|
60
|
+
else:
|
|
61
|
+
class_id = None
|
|
62
|
+
if "Label" in rowdata:
|
|
63
|
+
label = rowdata["Label"]
|
|
64
|
+
else:
|
|
65
|
+
label = None
|
|
66
|
+
if "Definition" in rowdata:
|
|
67
|
+
definition = rowdata["Definition"]
|
|
68
|
+
else:
|
|
69
|
+
definition = None
|
|
70
|
+
if "Parent" in rowdata:
|
|
71
|
+
parent = rowdata["Parent"]
|
|
72
|
+
else:
|
|
73
|
+
parent = None
|
|
74
|
+
if "To be reviewed by" in rowdata:
|
|
75
|
+
to_be_reviewed_by = rowdata["To be reviewed by"]
|
|
76
|
+
else:
|
|
77
|
+
to_be_reviewed_by = None
|
|
78
|
+
|
|
79
|
+
if class_id or label or definition or parent:
|
|
80
|
+
_logger.debug(
|
|
81
|
+
f"Adding entity data '{entity_data.get('ID')}' to index for repository '{repo_name}'"
|
|
82
|
+
f"and sheet '{sheet_name}'")
|
|
83
|
+
|
|
84
|
+
writer.add_document(repo=repo_name,
|
|
85
|
+
spreadsheet=sheet_name,
|
|
86
|
+
class_id=(class_id if class_id else None),
|
|
87
|
+
label=(label if label else None),
|
|
88
|
+
definition=(definition if definition else None),
|
|
89
|
+
parent=(parent if parent else None),
|
|
90
|
+
tobereviewedby=(to_be_reviewed_by if to_be_reviewed_by else None))
|
ose/index/schema.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from whoosh.fields import TEXT, KEYWORD, ID, SchemaClass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class OntologyContentSchema(SchemaClass):
|
|
5
|
+
repo = ID(stored=True)
|
|
6
|
+
spreadsheet = ID(stored=True)
|
|
7
|
+
class_id = ID(stored=True)
|
|
8
|
+
label = TEXT(stored=True)
|
|
9
|
+
definition = TEXT(stored=True)
|
|
10
|
+
parent = KEYWORD(stored=True)
|
|
11
|
+
tobereviewedby = TEXT(stored=True)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
schema = OntologyContentSchema()
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import enum
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Optional, Callable, Union, Any, List, Tuple
|
|
6
|
+
|
|
7
|
+
import urllib3.util
|
|
8
|
+
|
|
9
|
+
from .Relation import Relation, OWLPropertyType
|
|
10
|
+
from .TermIdentifier import TermIdentifier
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ColumnMappingKind(enum.Enum):
|
|
14
|
+
EXCLUDED_IMPORT_ID = 20
|
|
15
|
+
VERSION_IRI = 19
|
|
16
|
+
INVERSE_OF = 18
|
|
17
|
+
IGNORE = 17
|
|
18
|
+
RELATION_TYPE = 16
|
|
19
|
+
PREFIX = 15
|
|
20
|
+
PLAIN = 14
|
|
21
|
+
IMPORTED_ID = 13
|
|
22
|
+
ROOT_ID = 12
|
|
23
|
+
ONTOLOGY_IRI = 11
|
|
24
|
+
RANGE = 10
|
|
25
|
+
DOMAIN = 9
|
|
26
|
+
ONTOLOGY_ID = 8
|
|
27
|
+
SUB_PROPERTY_OF = 7
|
|
28
|
+
SYNONYMS = 6
|
|
29
|
+
RELATION = 5
|
|
30
|
+
DISJOINT_WITH = 4
|
|
31
|
+
EQUIVALENT_TO = 3
|
|
32
|
+
SUB_CLASS_OF = 2
|
|
33
|
+
LABEL = 1
|
|
34
|
+
ID = 0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ColumnMapping(abc.ABC):
|
|
38
|
+
|
|
39
|
+
@abc.abstractmethod
|
|
40
|
+
def get_name(self) -> str:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
@abc.abstractmethod
|
|
44
|
+
def get_kind(self) -> ColumnMappingKind:
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
def get_value(self, value: str) -> Any:
|
|
48
|
+
return value.strip()
|
|
49
|
+
|
|
50
|
+
def valid(self, value: str) -> bool:
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class SimpleColumnMapping(ColumnMapping):
|
|
56
|
+
kind: ColumnMappingKind
|
|
57
|
+
name: str
|
|
58
|
+
|
|
59
|
+
def get_name(self) -> str:
|
|
60
|
+
return self.name
|
|
61
|
+
|
|
62
|
+
def get_kind(self) -> ColumnMappingKind:
|
|
63
|
+
return self.kind
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class ChoiceColumnMapping(SimpleColumnMapping):
|
|
68
|
+
choices: List[str]
|
|
69
|
+
|
|
70
|
+
def valid(self, value: str) -> bool:
|
|
71
|
+
return value is not None and (value.strip() in self.choices)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class PrefixColumnMapping(SimpleColumnMapping):
|
|
75
|
+
_pattern = re.compile(r'"([\w\d]+):(.*)"')
|
|
76
|
+
|
|
77
|
+
def __init__(self, name: str, separator: Optional[str] = None):
|
|
78
|
+
self.name = name
|
|
79
|
+
self.separator = separator
|
|
80
|
+
self.kind = ColumnMappingKind.PREFIX
|
|
81
|
+
|
|
82
|
+
def get_value(self, value: str) -> List[Tuple[str, str]]:
|
|
83
|
+
values = [value]
|
|
84
|
+
if self.separator is not None:
|
|
85
|
+
values = value.split(self.separator)
|
|
86
|
+
|
|
87
|
+
res: List[Tuple[str, str]] = []
|
|
88
|
+
for v in values:
|
|
89
|
+
m = self._pattern.match(v.strip())
|
|
90
|
+
res.append((m.group(1).strip(), m.group(2).strip()))
|
|
91
|
+
|
|
92
|
+
return res
|
|
93
|
+
|
|
94
|
+
def valid(self, value: str) -> bool:
|
|
95
|
+
values = [value]
|
|
96
|
+
if self.separator is not None:
|
|
97
|
+
values = value.split(self.separator)
|
|
98
|
+
|
|
99
|
+
return all(self._pattern.match(v) is not None for v in values)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class ParentMapping(SimpleColumnMapping):
|
|
104
|
+
_pattern = re.compile(r"^([^\[]+)(?:\[(\w+:\d+)\]|\((\w+:\d+)\))?$")
|
|
105
|
+
|
|
106
|
+
def get_value(self, value: str) -> TermIdentifier:
|
|
107
|
+
match = self._pattern.match(value.strip())
|
|
108
|
+
id = match.group(2)
|
|
109
|
+
label = match.group(1)
|
|
110
|
+
return TermIdentifier(id=id.strip() if id is not None else None,
|
|
111
|
+
label=label.strip() if label is not None else None)
|
|
112
|
+
|
|
113
|
+
def valid(self, value: str) -> bool:
|
|
114
|
+
return self._pattern.match(value.strip()) is not None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class ManchesterSyntaxMapping(SimpleColumnMapping):
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass
|
|
123
|
+
class IRIMapping(SimpleColumnMapping):
|
|
124
|
+
allow_curie: bool = False
|
|
125
|
+
|
|
126
|
+
def valid(self, value: str) -> bool:
|
|
127
|
+
return urllib3.util.parse_url(value.strip()) is not None or \
|
|
128
|
+
self.allow_curie and ":" in value.strip()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass
|
|
132
|
+
class TermMapping(SimpleColumnMapping):
|
|
133
|
+
require_id: Optional[bool] = False
|
|
134
|
+
require_label: Optional[bool] = False
|
|
135
|
+
separator: Optional[str] = None
|
|
136
|
+
|
|
137
|
+
_term_pattern = re.compile(r"^([^\[]*)\s*(?:\[(.*)\])?$")
|
|
138
|
+
|
|
139
|
+
def get_value(self, value: str) -> List[TermIdentifier]:
|
|
140
|
+
idents = []
|
|
141
|
+
for val in [value.strip()] if self.separator is None else value.strip().split(self.separator):
|
|
142
|
+
if val.strip() == "":
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
m = self._term_pattern.match(val.strip())
|
|
146
|
+
label = m.group(1)
|
|
147
|
+
id = m.group(2)
|
|
148
|
+
idents.append(TermIdentifier(
|
|
149
|
+
id=None if id is None else id.strip(),
|
|
150
|
+
label=None if label is None else label.strip()))
|
|
151
|
+
|
|
152
|
+
return idents
|
|
153
|
+
|
|
154
|
+
def valid(self, value: str) -> bool:
|
|
155
|
+
if self.separator is None:
|
|
156
|
+
values = [value]
|
|
157
|
+
else:
|
|
158
|
+
values = value.strip().split(self.separator)
|
|
159
|
+
|
|
160
|
+
values = [v.strip() for v in values if v.strip() != ""]
|
|
161
|
+
|
|
162
|
+
for v in values:
|
|
163
|
+
m = self._term_pattern.match(v.strip())
|
|
164
|
+
if m is None or m.group(1) is None and self.require_label or m.group(2) is None and self.require_id:
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
return True
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@dataclass
|
|
171
|
+
class LabelMapping(SimpleColumnMapping):
|
|
172
|
+
kind = ColumnMappingKind.LABEL
|
|
173
|
+
|
|
174
|
+
_label_pattern = re.compile(r"(.*)\s*(?:\((.*)\))?")
|
|
175
|
+
|
|
176
|
+
def __init__(self, name: str):
|
|
177
|
+
self.name = name
|
|
178
|
+
|
|
179
|
+
def get_kind(self) -> ColumnMappingKind:
|
|
180
|
+
return ColumnMappingKind.LABEL
|
|
181
|
+
|
|
182
|
+
def get_value(self, value: str) -> str:
|
|
183
|
+
return self.get_label(value)
|
|
184
|
+
|
|
185
|
+
def get_label(self, value: str) -> str:
|
|
186
|
+
match = re.match(self._label_pattern, value)
|
|
187
|
+
if match is None:
|
|
188
|
+
return value.strip()
|
|
189
|
+
else:
|
|
190
|
+
return match.group(1).strip()
|
|
191
|
+
|
|
192
|
+
def get_synonyms(self, value: str) -> List[str]:
|
|
193
|
+
match = re.match(self._label_pattern, value)
|
|
194
|
+
if match is None:
|
|
195
|
+
return []
|
|
196
|
+
else:
|
|
197
|
+
return [s.strip() for s in match.group(2).split(";")]
|
|
198
|
+
|
|
199
|
+
def valid(self, value: str) -> bool:
|
|
200
|
+
return self._label_pattern.match(value) is not None
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@dataclass
|
|
204
|
+
class RelationColumnMapping(ColumnMapping):
|
|
205
|
+
relation: Relation
|
|
206
|
+
name: str
|
|
207
|
+
separator: Optional[str] = None
|
|
208
|
+
|
|
209
|
+
def get_name(self) -> str:
|
|
210
|
+
return self.name
|
|
211
|
+
|
|
212
|
+
def get_kind(self) -> ColumnMappingKind:
|
|
213
|
+
return ColumnMappingKind.RELATION
|
|
214
|
+
|
|
215
|
+
def get_relation(self) -> Relation:
|
|
216
|
+
return self.relation
|
|
217
|
+
|
|
218
|
+
def get_value(self, value: str) -> List[Tuple[TermIdentifier, Any]]:
|
|
219
|
+
values = [x.strip() for x in str(value).split(self.separator)] if self.separator is not None else [
|
|
220
|
+
str(value).strip()]
|
|
221
|
+
|
|
222
|
+
if self.relation.owl_property_type == OWLPropertyType.ObjectProperty:
|
|
223
|
+
values = [TermIdentifier(label=x) for x in values]
|
|
224
|
+
|
|
225
|
+
return [(self.relation.identifier(), x) for x in values]
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class ColumnMappingFactory(abc.ABC):
|
|
229
|
+
@abc.abstractmethod
|
|
230
|
+
def maps(self, column_name: str) -> bool:
|
|
231
|
+
pass
|
|
232
|
+
|
|
233
|
+
@abc.abstractmethod
|
|
234
|
+
def create_mapping(self, origin: str, column_name: str) -> ColumnMapping:
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@dataclass
|
|
239
|
+
class SingletonMappingFactory(ColumnMappingFactory):
|
|
240
|
+
column_names: List[str]
|
|
241
|
+
mapping: ColumnMapping
|
|
242
|
+
|
|
243
|
+
def maps(self, column_name: str) -> bool:
|
|
244
|
+
return column_name in self.column_names
|
|
245
|
+
|
|
246
|
+
def create_mapping(self, origin: str, column_name: str) -> ColumnMapping:
|
|
247
|
+
return self.mapping
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@dataclass
|
|
251
|
+
class PatternMappingFactory(ColumnMappingFactory):
|
|
252
|
+
pattern: re.Pattern
|
|
253
|
+
mapping_factory: Callable[[str, str, re.Match], ColumnMapping]
|
|
254
|
+
|
|
255
|
+
def maps(self, column_name: str) -> bool:
|
|
256
|
+
return re.match(self.pattern, column_name) is not None
|
|
257
|
+
|
|
258
|
+
def create_mapping(self, origin: str, column_name: str) -> ColumnMapping:
|
|
259
|
+
match = re.match(self.pattern, column_name)
|
|
260
|
+
return self.mapping_factory(origin, column_name, match)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def singleton(excel_names: List[str], mapping: Callable[..., ColumnMapping], *args, **kwargs) -> ColumnMappingFactory:
|
|
264
|
+
return SingletonMappingFactory(excel_names, mapping(*args, **{"name": excel_names[0], **kwargs}))
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def simple(excel_names: List[str], kind: ColumnMappingKind, name: Optional[str] = None) -> ColumnMappingFactory:
|
|
268
|
+
return SingletonMappingFactory(excel_names, SimpleColumnMapping(kind, excel_names[0] if name is None else name))
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def relation(excel_name: List[str], relation: TermIdentifier, name: Optional[str] = None,
|
|
272
|
+
separator: Optional[str] = None,
|
|
273
|
+
property_type: OWLPropertyType = OWLPropertyType.AnnotationProperty) -> ColumnMappingFactory:
|
|
274
|
+
return SingletonMappingFactory(excel_name, RelationColumnMapping(
|
|
275
|
+
Relation(relation.id, relation.label, [], [], property_type, [], None, None, [], ("<schema>", 0)),
|
|
276
|
+
excel_name[0] if name is None else name, separator))
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def internal(excel_names: List[str], name: str, split: Optional[str] = None) -> ColumnMappingFactory:
|
|
280
|
+
return relation(excel_names, TermIdentifier(id=None, label=name), name, split, OWLPropertyType.Internal)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def ignore(excel_name: str) -> ColumnMappingFactory:
|
|
284
|
+
def _ignore(*_args, **_kwargs):
|
|
285
|
+
return SimpleColumnMapping(ColumnMappingKind.IGNORE, excel_name)
|
|
286
|
+
|
|
287
|
+
return singleton([excel_name], _ignore)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def relation_pattern(pattern: Union[str, re.Pattern],
|
|
291
|
+
factory: Callable[[str, re.Match], TermIdentifier],
|
|
292
|
+
separator: Optional[str] = None,
|
|
293
|
+
relation_kind: OWLPropertyType = OWLPropertyType.AnnotationProperty) -> ColumnMappingFactory:
|
|
294
|
+
def f(origin: str, rel_name: str, match: re.Match) -> RelationColumnMapping:
|
|
295
|
+
identifier = factory(rel_name, match)
|
|
296
|
+
return RelationColumnMapping(
|
|
297
|
+
Relation(identifier.id, identifier.label, [], [], relation_kind, [], None, None, [], (origin, 0)),
|
|
298
|
+
f"REL {rel_name}",
|
|
299
|
+
separator
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
return PatternMappingFactory(pattern, f)
|
ose/model/Diff.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
from typing import Generic, List
|
|
5
|
+
|
|
6
|
+
A = typing.TypeVar("A")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclasses.dataclass
|
|
10
|
+
class Diff(Generic[A]):
|
|
11
|
+
field: str
|
|
12
|
+
a: A
|
|
13
|
+
b: A
|
|
14
|
+
change_type: str = "update"
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def old(self) -> A:
|
|
18
|
+
return self.a
|
|
19
|
+
|
|
20
|
+
@old.setter
|
|
21
|
+
def old(self, value: A):
|
|
22
|
+
self.a = value
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def new(self) -> A:
|
|
26
|
+
return self.b
|
|
27
|
+
|
|
28
|
+
@new.setter
|
|
29
|
+
def new(self, value: A):
|
|
30
|
+
self.b = value
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
SDiff = Diff[str]
|
|
34
|
+
|
|
35
|
+
PRIMITIVE_TYPES = (bool, str, int, float, type(None))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def diff(a, b, __path="") -> List[Diff]:
|
|
39
|
+
if not (dataclasses.is_dataclass(a) or isinstance(a, dict)) or not \
|
|
40
|
+
(dataclasses.is_dataclass(b) or isinstance(b, dict)):
|
|
41
|
+
raise ValueError(f"Values must be dataclasses or dictionaries. a: '{type(a)}', b: '{type(b)}'")
|
|
42
|
+
|
|
43
|
+
if type(a) is not type(b):
|
|
44
|
+
raise ValueError(f"Values must be of same type but '{type(a)}' != '{type(b)}'")
|
|
45
|
+
|
|
46
|
+
results = []
|
|
47
|
+
|
|
48
|
+
if dataclasses.is_dataclass(a):
|
|
49
|
+
fields = [f.name for f in dataclasses.fields(a)]
|
|
50
|
+
|
|
51
|
+
a = dataclasses.asdict(a)
|
|
52
|
+
b = dataclasses.asdict(b)
|
|
53
|
+
else:
|
|
54
|
+
fields = a.keys()
|
|
55
|
+
|
|
56
|
+
for f in fields:
|
|
57
|
+
val_a = a[f]
|
|
58
|
+
val_b = b[f]
|
|
59
|
+
|
|
60
|
+
path = __path + ("." if __path else "") + f
|
|
61
|
+
|
|
62
|
+
if isinstance(val_a, PRIMITIVE_TYPES):
|
|
63
|
+
if val_a != val_b:
|
|
64
|
+
results.append(Diff(path, val_a, val_b))
|
|
65
|
+
elif isinstance(val_a, (list, tuple, set)):
|
|
66
|
+
adds = [v for v in val_b if v not in val_a]
|
|
67
|
+
removes = [v for v in val_a if v not in val_b]
|
|
68
|
+
|
|
69
|
+
for v in adds:
|
|
70
|
+
results.append(Diff(path, None, v, "add"))
|
|
71
|
+
|
|
72
|
+
for v in removes:
|
|
73
|
+
results.append(Diff(path, v, None, "remove"))
|
|
74
|
+
else:
|
|
75
|
+
results.extend(diff(val_a, val_b, path))
|
|
76
|
+
|
|
77
|
+
return results
|