biocypher 0.5.36__tar.gz → 0.5.37__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biocypher might be problematic. Click here for more details.
- {biocypher-0.5.36 → biocypher-0.5.37}/PKG-INFO +1 -1
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_metadata.py +1 -1
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_write.py +47 -5
- {biocypher-0.5.36 → biocypher-0.5.37}/pyproject.toml +1 -1
- {biocypher-0.5.36 → biocypher-0.5.37}/LICENSE +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/README.md +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/__init__.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_config/__init__.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_config/biocypher_config.yaml +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_config/test_config.yaml +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_config/test_schema_config.yaml +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_config/test_schema_config_disconnected.yaml +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_config/test_schema_config_extended.yaml +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_connect.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_core.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_create.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_deduplicate.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_get.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_logger.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_mapping.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_misc.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_ontology.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_pandas.py +0 -0
- {biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_translate.py +0 -0
|
@@ -13,6 +13,7 @@ BioCypher 'offline' module. Handles the writing of node and edge representations
|
|
|
13
13
|
suitable for import into a DBMS.
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
|
+
import re
|
|
16
17
|
import glob
|
|
17
18
|
|
|
18
19
|
from ._logger import logger
|
|
@@ -22,7 +23,6 @@ logger.debug(f"Loading module {__name__}.")
|
|
|
22
23
|
from abc import ABC, abstractmethod
|
|
23
24
|
from types import GeneratorType
|
|
24
25
|
from typing import TYPE_CHECKING, Union, Optional
|
|
25
|
-
from datetime import datetime
|
|
26
26
|
from collections import OrderedDict, defaultdict
|
|
27
27
|
import os
|
|
28
28
|
|
|
@@ -34,7 +34,6 @@ from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
|
|
|
34
34
|
__all__ = ["get_writer"]
|
|
35
35
|
|
|
36
36
|
if TYPE_CHECKING:
|
|
37
|
-
from ._ontology import Ontology
|
|
38
37
|
from ._translate import Translator
|
|
39
38
|
from ._deduplicate import Deduplicator
|
|
40
39
|
|
|
@@ -954,7 +953,9 @@ class _BatchWriter(ABC):
|
|
|
954
953
|
bool: The return value. True for success, False otherwise.
|
|
955
954
|
"""
|
|
956
955
|
# translate label to PascalCase
|
|
957
|
-
label_pascal = self.translator.name_sentence_to_pascal(
|
|
956
|
+
label_pascal = self.translator.name_sentence_to_pascal(
|
|
957
|
+
parse_label(label)
|
|
958
|
+
)
|
|
958
959
|
|
|
959
960
|
# list files in self.outdir
|
|
960
961
|
files = glob.glob(
|
|
@@ -1086,7 +1087,9 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
1086
1087
|
_id = ":ID"
|
|
1087
1088
|
|
|
1088
1089
|
# translate label to PascalCase
|
|
1089
|
-
pascal_label = self.translator.name_sentence_to_pascal(
|
|
1090
|
+
pascal_label = self.translator.name_sentence_to_pascal(
|
|
1091
|
+
parse_label(label)
|
|
1092
|
+
)
|
|
1090
1093
|
|
|
1091
1094
|
header = f"{pascal_label}-header.csv"
|
|
1092
1095
|
header_path = os.path.join(
|
|
@@ -1165,7 +1168,9 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
1165
1168
|
|
|
1166
1169
|
for label, props in self.edge_property_dict.items():
|
|
1167
1170
|
# translate label to PascalCase
|
|
1168
|
-
pascal_label = self.translator.name_sentence_to_pascal(
|
|
1171
|
+
pascal_label = self.translator.name_sentence_to_pascal(
|
|
1172
|
+
parse_label(label)
|
|
1173
|
+
)
|
|
1169
1174
|
|
|
1170
1175
|
# paths
|
|
1171
1176
|
header = f"{pascal_label}-header.csv"
|
|
@@ -1310,6 +1315,43 @@ class _Neo4jBatchWriter(_BatchWriter):
|
|
|
1310
1315
|
return import_call
|
|
1311
1316
|
|
|
1312
1317
|
|
|
1318
|
+
def parse_label(label: str) -> str:
|
|
1319
|
+
"""
|
|
1320
|
+
|
|
1321
|
+
Check if the label is compliant with Neo4j naming conventions,
|
|
1322
|
+
https://neo4j.com/docs/cypher-manual/current/syntax/naming/, and if not,
|
|
1323
|
+
remove non-compliant characters.
|
|
1324
|
+
|
|
1325
|
+
Args:
|
|
1326
|
+
label (str): The label to check
|
|
1327
|
+
Returns:
|
|
1328
|
+
str: The compliant label
|
|
1329
|
+
"""
|
|
1330
|
+
# Check if the name contains only alphanumeric characters, underscore, or dollar sign
|
|
1331
|
+
# and dot (for class hierarchy of BioCypher)
|
|
1332
|
+
allowed_chars = r"a-zA-Z0-9_$ ."
|
|
1333
|
+
matches = re.findall(f"[{allowed_chars}]", label)
|
|
1334
|
+
non_matches = re.findall(f"[^{allowed_chars}]", label)
|
|
1335
|
+
if non_matches:
|
|
1336
|
+
non_matches = list(set(non_matches))
|
|
1337
|
+
logger.warning(
|
|
1338
|
+
f"Label is not compliant with Neo4j naming rules. Removed non compliant characters: {non_matches}"
|
|
1339
|
+
)
|
|
1340
|
+
|
|
1341
|
+
def first_character_compliant(character: str) -> bool:
|
|
1342
|
+
return character.isalpha() or character == "$"
|
|
1343
|
+
|
|
1344
|
+
if not first_character_compliant(matches[0]):
|
|
1345
|
+
for c in matches:
|
|
1346
|
+
if first_character_compliant(c):
|
|
1347
|
+
matches = matches[matches.index(c) :]
|
|
1348
|
+
break
|
|
1349
|
+
logger.warning(
|
|
1350
|
+
"Label does not start with an alphabetic character or with $. Removed non compliant characters."
|
|
1351
|
+
)
|
|
1352
|
+
return "".join(matches).strip()
|
|
1353
|
+
|
|
1354
|
+
|
|
1313
1355
|
class _ArangoDBBatchWriter(_Neo4jBatchWriter):
|
|
1314
1356
|
"""
|
|
1315
1357
|
Class for writing node and edge representations to disk using the format
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{biocypher-0.5.36 → biocypher-0.5.37}/biocypher/_config/test_schema_config_disconnected.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|