biocypher 0.5.36__py3-none-any.whl → 0.5.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

biocypher/_metadata.py CHANGED
@@ -19,7 +19,7 @@ import importlib.metadata
19
19
 
20
20
  import toml
21
21
 
22
- _VERSION = "0.5.36"
22
+ _VERSION = "0.5.37"
23
23
 
24
24
 
25
25
  def get_metadata():
biocypher/_write.py CHANGED
@@ -13,6 +13,7 @@ BioCypher 'offline' module. Handles the writing of node and edge representations
13
13
  suitable for import into a DBMS.
14
14
  """
15
15
 
16
+ import re
16
17
  import glob
17
18
 
18
19
  from ._logger import logger
@@ -22,7 +23,6 @@ logger.debug(f"Loading module {__name__}.")
22
23
  from abc import ABC, abstractmethod
23
24
  from types import GeneratorType
24
25
  from typing import TYPE_CHECKING, Union, Optional
25
- from datetime import datetime
26
26
  from collections import OrderedDict, defaultdict
27
27
  import os
28
28
 
@@ -34,7 +34,6 @@ from ._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
34
34
  __all__ = ["get_writer"]
35
35
 
36
36
  if TYPE_CHECKING:
37
- from ._ontology import Ontology
38
37
  from ._translate import Translator
39
38
  from ._deduplicate import Deduplicator
40
39
 
@@ -954,7 +953,9 @@ class _BatchWriter(ABC):
954
953
  bool: The return value. True for success, False otherwise.
955
954
  """
956
955
  # translate label to PascalCase
957
- label_pascal = self.translator.name_sentence_to_pascal(label)
956
+ label_pascal = self.translator.name_sentence_to_pascal(
957
+ parse_label(label)
958
+ )
958
959
 
959
960
  # list files in self.outdir
960
961
  files = glob.glob(
@@ -1086,7 +1087,9 @@ class _Neo4jBatchWriter(_BatchWriter):
1086
1087
  _id = ":ID"
1087
1088
 
1088
1089
  # translate label to PascalCase
1089
- pascal_label = self.translator.name_sentence_to_pascal(label)
1090
+ pascal_label = self.translator.name_sentence_to_pascal(
1091
+ parse_label(label)
1092
+ )
1090
1093
 
1091
1094
  header = f"{pascal_label}-header.csv"
1092
1095
  header_path = os.path.join(
@@ -1165,7 +1168,9 @@ class _Neo4jBatchWriter(_BatchWriter):
1165
1168
 
1166
1169
  for label, props in self.edge_property_dict.items():
1167
1170
  # translate label to PascalCase
1168
- pascal_label = self.translator.name_sentence_to_pascal(label)
1171
+ pascal_label = self.translator.name_sentence_to_pascal(
1172
+ parse_label(label)
1173
+ )
1169
1174
 
1170
1175
  # paths
1171
1176
  header = f"{pascal_label}-header.csv"
@@ -1310,6 +1315,43 @@ class _Neo4jBatchWriter(_BatchWriter):
1310
1315
  return import_call
1311
1316
 
1312
1317
 
1318
+ def parse_label(label: str) -> str:
1319
+ """
1320
+
1321
+ Check if the label is compliant with Neo4j naming conventions,
1322
+ https://neo4j.com/docs/cypher-manual/current/syntax/naming/, and if not,
1323
+ remove non-compliant characters.
1324
+
1325
+ Args:
1326
+ label (str): The label to check
1327
+ Returns:
1328
+ str: The compliant label
1329
+ """
1330
+ # Check if the name contains only alphanumeric characters, underscore, or dollar sign
1331
+ # and dot (for class hierarchy of BioCypher)
1332
+ allowed_chars = r"a-zA-Z0-9_$ ."
1333
+ matches = re.findall(f"[{allowed_chars}]", label)
1334
+ non_matches = re.findall(f"[^{allowed_chars}]", label)
1335
+ if non_matches:
1336
+ non_matches = list(set(non_matches))
1337
+ logger.warning(
1338
+ f"Label is not compliant with Neo4j naming rules. Removed non compliant characters: {non_matches}"
1339
+ )
1340
+
1341
+ def first_character_compliant(character: str) -> bool:
1342
+ return character.isalpha() or character == "$"
1343
+
1344
+ if not first_character_compliant(matches[0]):
1345
+ for c in matches:
1346
+ if first_character_compliant(c):
1347
+ matches = matches[matches.index(c) :]
1348
+ break
1349
+ logger.warning(
1350
+ "Label does not start with an alphabetic character or with $. Removed non compliant characters."
1351
+ )
1352
+ return "".join(matches).strip()
1353
+
1354
+
1313
1355
  class _ArangoDBBatchWriter(_Neo4jBatchWriter):
1314
1356
  """
1315
1357
  Class for writing node and edge representations to disk using the format
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biocypher
3
- Version: 0.5.36
3
+ Version: 0.5.37
4
4
  Summary: A unifying framework for biomedical research knowledge graphs
5
5
  Home-page: https://github.com/biocypher/biocypher
6
6
  License: MIT
@@ -12,13 +12,13 @@ biocypher/_deduplicate.py,sha256=BBvfpXzu6L5YDY5FdtXxnf8YlsbJpbCE8RdUoKsm0n0,494
12
12
  biocypher/_get.py,sha256=3Kpky3blfNf1JwxKWLsZxTU2aTP_C4sUe8OpiyYj63I,10810
13
13
  biocypher/_logger.py,sha256=NGXe3hZA79WSujfOgpcxHBf8N2QAfrmvM1LFDpsGK2U,3185
14
14
  biocypher/_mapping.py,sha256=ERSNH2Bg19145KytxbFE4BInPaiP-LWW7osOBot29Eo,9304
15
- biocypher/_metadata.py,sha256=7k16RMYEquR4rs2kSUGc2uJs0T-HntytZ2cZerQtsFk,1658
15
+ biocypher/_metadata.py,sha256=-yWk0B2JDJXt34dPea_5x4t2YASfP8wc6iRVvRKdB1A,1658
16
16
  biocypher/_misc.py,sha256=g5B-PO_XJlYEJC7kEVRdCXeB2NW0ZSVr_5KqTEk2ldk,5877
17
17
  biocypher/_ontology.py,sha256=53hHroH4K9MbwueK2pAbdkidMRBVH2adlQ66QbI_BiE,26734
18
18
  biocypher/_pandas.py,sha256=GVCFM68J7yBjh40MpkNVgD8qT1RFMrrIjMOtD3iKsf4,3040
19
19
  biocypher/_translate.py,sha256=nj4Y60F0U3JBH36N2dh5pFcC8Ot86rskJ2ChJwje9dI,16494
20
- biocypher/_write.py,sha256=nvb75OwElu8fLUp0FjEBqQ1VNpx6iRrk-t7v_TOlDhg,68165
21
- biocypher-0.5.36.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
22
- biocypher-0.5.36.dist-info/METADATA,sha256=0BlPiw51Ym03ecbQ4uBIDCexAU31tbRa1Iqr19GSbiE,10573
23
- biocypher-0.5.36.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
24
- biocypher-0.5.36.dist-info/RECORD,,
20
+ biocypher/_write.py,sha256=EjAnNzayVKvBuIVLw3gY8T9fTnfIPaTGODu275IaRJ8,69554
21
+ biocypher-0.5.37.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
22
+ biocypher-0.5.37.dist-info/METADATA,sha256=iD1g7a9FH1JRfhVynse5qctOAsazZxa0_YDxr-OCDkc,10573
23
+ biocypher-0.5.37.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
24
+ biocypher-0.5.37.dist-info/RECORD,,