biocypher 0.5.19__py3-none-any.whl → 0.5.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biocypher might be problematic. Click here for more details.

biocypher/_write.py CHANGED
@@ -125,7 +125,6 @@ class _BatchWriter(ABC):
125
125
 
126
126
  def __init__(
127
127
  self,
128
- ontology: "Ontology",
129
128
  translator: "Translator",
130
129
  deduplicator: "Deduplicator",
131
130
  delimiter: str,
@@ -167,10 +166,6 @@ class _BatchWriter(ABC):
167
166
  - _get_import_script_name
168
167
 
169
168
  Args:
170
- ontology:
171
- Instance of :py:class:`Ontology` to enable translation and
172
- ontology queries
173
-
174
169
  translator:
175
170
  Instance of :py:class:`Translator` to enable translation of
176
171
  nodes and manipulation of properties.
@@ -251,8 +246,6 @@ class _BatchWriter(ABC):
251
246
  self.wipe = wipe
252
247
  self.strict_mode = strict_mode
253
248
 
254
- self.extended_schema = ontology.extended_schema
255
- self.ontology = ontology
256
249
  self.translator = translator
257
250
  self.deduplicator = deduplicator
258
251
  self.node_property_dict = {}
@@ -352,34 +345,34 @@ class _BatchWriter(ABC):
352
345
  bool: The return value. True for success, False otherwise.
353
346
  """
354
347
  passed = False
355
- # unwrap generator in one step
356
348
  edges = list(edges) # force evaluation to handle empty generator
357
349
  if edges:
358
- z = zip(
359
- *(
360
- (
361
- e.get_node(),
362
- [
363
- e.get_source_edge(),
364
- e.get_target_edge(),
365
- ],
366
- )
367
- if isinstance(e, BioCypherRelAsNode)
368
- else (None, [e])
369
- for e in edges
370
- )
371
- )
372
- nod, edg = (list(a) for a in z)
373
- nod = [n for n in nod if n]
374
- edg = [val for sublist in edg for val in sublist] # flatten
350
+ nodes_flat = []
351
+ edges_flat = []
352
+ for edge in edges:
353
+ if isinstance(edge, BioCypherRelAsNode):
354
+ # check if relationship has already been written, if so skip
355
+ if self.deduplicator.rel_as_node_seen(edge):
356
+ continue
375
357
 
376
- if nod and edg:
377
- passed = self.write_nodes(nod) and self._write_edge_data(
378
- edg,
358
+ nodes_flat.append(edge.get_node())
359
+ edges_flat.append(edge.get_source_edge())
360
+ edges_flat.append(edge.get_target_edge())
361
+
362
+ else:
363
+ # check if relationship has already been written, if so skip
364
+ if self.deduplicator.edge_seen(edge):
365
+ continue
366
+
367
+ edges_flat.append(edge)
368
+
369
+ if nodes_flat and edges_flat:
370
+ passed = self.write_nodes(nodes_flat) and self._write_edge_data(
371
+ edges_flat,
379
372
  batch_size,
380
373
  )
381
374
  else:
382
- passed = self._write_edge_data(edg, batch_size)
375
+ passed = self._write_edge_data(edges_flat, batch_size)
383
376
 
384
377
  else:
385
378
  # is this a problem? if the generator or list is empty, we
@@ -451,8 +444,12 @@ class _BatchWriter(ABC):
451
444
  bin_l[label] = 1
452
445
 
453
446
  # get properties from config if present
454
- cprops = self.extended_schema.get(label).get(
455
- "properties",
447
+ cprops = (
448
+ self.translator.ontology.mapping.extended_schema.get(
449
+ label
450
+ ).get(
451
+ "properties",
452
+ )
456
453
  )
457
454
  if cprops:
458
455
  d = dict(cprops)
@@ -486,7 +483,7 @@ class _BatchWriter(ABC):
486
483
 
487
484
  # get label hierarchy
488
485
  # multiple labels:
489
- all_labels = self.ontology.get_ancestors(label)
486
+ all_labels = self.translator.ontology.get_ancestors(label)
490
487
 
491
488
  if all_labels:
492
489
  # convert to pascal case
@@ -682,10 +679,6 @@ class _BatchWriter(ABC):
682
679
  # for each label to check for consistency and their type
683
680
  # for now, relevant for `int`
684
681
  for edge in edges:
685
- # check for duplicates
686
- if self.deduplicator.edge_seen(edge):
687
- continue
688
-
689
682
  if not (edge.get_source_id() and edge.get_target_id()):
690
683
  logger.error(
691
684
  "Edge must have source and target node. "
@@ -706,13 +699,23 @@ class _BatchWriter(ABC):
706
699
  # (may not be if it is an edge that carries the
707
700
  # "label_as_edge" property)
708
701
  cprops = None
709
- if label in self.extended_schema:
710
- cprops = self.extended_schema.get(label).get(
702
+ if (
703
+ label
704
+ in self.translator.ontology.mapping.extended_schema
705
+ ):
706
+ cprops = self.translator.ontology.mapping.extended_schema.get(
707
+ label
708
+ ).get(
711
709
  "properties",
712
710
  )
713
711
  else:
714
712
  # try via "label_as_edge"
715
- for k, v in self.extended_schema.items():
713
+ for (
714
+ k,
715
+ v,
716
+ ) in (
717
+ self.translator.ontology.mapping.extended_schema.items()
718
+ ):
716
719
  if isinstance(v, dict):
717
720
  if v.get("label_as_edge") == label:
718
721
  cprops = v.get("properties")
@@ -873,9 +876,14 @@ class _BatchWriter(ABC):
873
876
 
874
877
  if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
875
878
  skip_id = True
876
- elif not self.extended_schema.get(label):
879
+ elif not self.translator.ontology.mapping.extended_schema.get(
880
+ label
881
+ ):
877
882
  # find label in schema by label_as_edge
878
- for k, v in self.extended_schema.items():
883
+ for (
884
+ k,
885
+ v,
886
+ ) in self.translator.ontology.mapping.extended_schema.items():
879
887
  if v.get("label_as_edge") == label:
880
888
  schema_label = k
881
889
  break
@@ -884,7 +892,9 @@ class _BatchWriter(ABC):
884
892
 
885
893
  if schema_label:
886
894
  if (
887
- self.extended_schema.get(schema_label).get("use_id")
895
+ self.translator.ontology.mapping.extended_schema.get(
896
+ schema_label
897
+ ).get("use_id")
888
898
  == False
889
899
  ):
890
900
  skip_id = True
@@ -1009,6 +1019,7 @@ class _Neo4jBatchWriter(_BatchWriter):
1009
1019
 
1010
1020
  This class inherits from the abstract class "_BatchWriter" and implements the
1011
1021
  Neo4j-specific methods:
1022
+
1012
1023
  - _write_node_headers
1013
1024
  - _write_edge_headers
1014
1025
  - _construct_import_call
@@ -1181,9 +1192,14 @@ class _Neo4jBatchWriter(_BatchWriter):
1181
1192
 
1182
1193
  if label in ["IS_SOURCE_OF", "IS_TARGET_OF", "IS_PART_OF"]:
1183
1194
  skip_id = True
1184
- elif not self.extended_schema.get(label):
1195
+ elif not self.translator.ontology.mapping.extended_schema.get(
1196
+ label
1197
+ ):
1185
1198
  # find label in schema by label_as_edge
1186
- for k, v in self.extended_schema.items():
1199
+ for (
1200
+ k,
1201
+ v,
1202
+ ) in self.translator.ontology.mapping.extended_schema.items():
1187
1203
  if v.get("label_as_edge") == label:
1188
1204
  schema_label = k
1189
1205
  break
@@ -1194,7 +1210,9 @@ class _Neo4jBatchWriter(_BatchWriter):
1194
1210
 
1195
1211
  if schema_label:
1196
1212
  if (
1197
- self.extended_schema.get(schema_label).get("use_id")
1213
+ self.translator.ontology.mapping.extended_schema.get(
1214
+ schema_label
1215
+ ).get("use_id")
1198
1216
  == False
1199
1217
  ):
1200
1218
  skip_id = True
@@ -1352,9 +1370,9 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
1352
1370
  f.write(row)
1353
1371
 
1354
1372
  # add collection from schema config
1355
- collection = self.extended_schema[label].get(
1356
- "db_collection_name", None
1357
- )
1373
+ collection = self.translator.ontology.mapping.extended_schema[
1374
+ label
1375
+ ].get("db_collection_name", None)
1358
1376
 
1359
1377
  # add file path to neo4 admin import statement
1360
1378
  # do once for each part file
@@ -1433,16 +1451,19 @@ class _ArangoDBBatchWriter(_Neo4jBatchWriter):
1433
1451
  f.write(row)
1434
1452
 
1435
1453
  # add collection from schema config
1436
- if not self.extended_schema.get(label):
1437
- for _, v in self.extended_schema.items():
1454
+ if not self.translator.ontology.mapping.extended_schema.get(label):
1455
+ for (
1456
+ _,
1457
+ v,
1458
+ ) in self.translator.ontology.mapping.extended_schema.items():
1438
1459
  if v.get("label_as_edge") == label:
1439
1460
  collection = v.get("db_collection_name", None)
1440
1461
  break
1441
1462
 
1442
1463
  else:
1443
- collection = self.extended_schema[label].get(
1444
- "db_collection_name", None
1445
- )
1464
+ collection = self.translator.ontology.mapping.extended_schema[
1465
+ label
1466
+ ].get("db_collection_name", None)
1446
1467
 
1447
1468
  # add file path to neo4 admin import statement (import call path
1448
1469
  # may be different from actual output path)
@@ -1520,6 +1541,7 @@ class _PostgreSQLBatchWriter(_BatchWriter):
1520
1541
 
1521
1542
  This class inherits from the abstract class "_BatchWriter" and implements the
1522
1543
  PostgreSQL-specific methods:
1544
+
1523
1545
  - _write_node_headers
1524
1546
  - _write_edge_headers
1525
1547
  - _construct_import_call
@@ -1839,7 +1861,6 @@ DBMS_TO_CLASS = {
1839
1861
  def get_writer(
1840
1862
  dbms: str,
1841
1863
  translator: "Translator",
1842
- ontology: "Ontology",
1843
1864
  deduplicator: "Deduplicator",
1844
1865
  output_directory: str,
1845
1866
  strict_mode: bool,
@@ -1854,8 +1875,6 @@ def get_writer(
1854
1875
 
1855
1876
  translator: the Translator object.
1856
1877
 
1857
- ontology: the Ontology object.
1858
-
1859
1878
  output_directory: the directory to write the output files to.
1860
1879
 
1861
1880
  strict_mode: whether to use strict mode.
@@ -1879,7 +1898,6 @@ def get_writer(
1879
1898
 
1880
1899
  if writer is not None:
1881
1900
  return writer(
1882
- ontology=ontology,
1883
1901
  translator=translator,
1884
1902
  deduplicator=deduplicator,
1885
1903
  delimiter=dbms_config.get("delimiter"),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biocypher
3
- Version: 0.5.19
3
+ Version: 0.5.21
4
4
  Summary: A unifying framework for biomedical research knowledge graphs
5
5
  Home-page: https://github.com/biocypher/biocypher
6
6
  License: MIT
@@ -25,8 +25,10 @@ Requires-Dist: more_itertools
25
25
  Requires-Dist: neo4j-utils (==0.0.7)
26
26
  Requires-Dist: networkx (>=3.0,<4.0)
27
27
  Requires-Dist: pandas (>=2.0.1,<3.0.0)
28
+ Requires-Dist: pooch (>=1.7.0,<2.0.0)
28
29
  Requires-Dist: rdflib (>=6.2.0,<7.0.0)
29
30
  Requires-Dist: stringcase (>=1.2.0,<2.0.0)
31
+ Requires-Dist: tqdm (>=4.65.0,<5.0.0)
30
32
  Requires-Dist: treelib (>=1.6.1,<2.0.0)
31
33
  Project-URL: Bug Tracker, https://github.com/biocypher/biocypher/issues
32
34
  Project-URL: Repository, https://github.com/biocypher/biocypher
@@ -38,7 +40,9 @@ Description-Content-Type: text/markdown
38
40
  ![Python](https://img.shields.io/badge/python-3.10-blue.svg)
39
41
  [![PyPI version](https://badge.fury.io/py/biocypher.svg)](https://badge.fury.io/py/biocypher)
40
42
  [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
41
- ![Docs build](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yml/badge.svg)
43
+ [![CI](https://github.com/biocypher/biocypher/actions/workflows/ci_cd.yaml/badge.svg)](https://github.com/biocypher/biocypher/actions/workflows/ci_cd.yaml)
44
+ ![Coverage](https://raw.githubusercontent.com/biocypher/biocypher/coverage/coverage.svg)
45
+ [![Docs build](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yaml/badge.svg)](https://github.com/biocypher/biocypher/actions/workflows/sphinx_autodoc.yaml)
42
46
  [![Downloads](https://static.pepy.tech/badge/biocypher)](https://pepy.tech/project/biocypher)
43
47
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
44
48
  [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com)
@@ -5,19 +5,20 @@ biocypher/_config/test_config.yaml,sha256=Np8jeS5_EP6HHOvMKb7B_Tkyqd5YaYlYz_DVsX
5
5
  biocypher/_config/test_schema_config.yaml,sha256=D1600WgEj3iTXrumVU9LIivJHJO36iaxfkOgyam9zVU,3129
6
6
  biocypher/_config/test_schema_config_disconnected.yaml,sha256=Qm8FLxEn2spHcyj_5F859KjcDvKSxNhxDvi4b4LLkvQ,68
7
7
  biocypher/_config/test_schema_config_extended.yaml,sha256=wn3A76142hhjnImhMF6RODbCFESTJ2TtPvcFdIFsAT0,3309
8
- biocypher/_connect.py,sha256=i62424Cbdnm2oI4ECLkcMF2V2A6aShCK2eSSwaGLbVE,12603
9
- biocypher/_core.py,sha256=Sg7ESentsTsqp9KbzPC_jh1fRAqOGzyy98Xzma7BBkw,17100
8
+ biocypher/_connect.py,sha256=0oSyO6CEIlKL8rHo-HHE7y0FzGfSi4vnEXSDy1TnIUE,12456
9
+ biocypher/_core.py,sha256=cc8iOOAhaByobN6zOwdUm1hZFAJ5CpGpKmQnBIIQrbY,21090
10
10
  biocypher/_create.py,sha256=vpUchUdEpWupZi1LgFLxAWMtqoBwnWbP7PwEDUCBS4A,10202
11
- biocypher/_deduplicate.py,sha256=ah2i6ONx6ml4MbQMXIe6NfbVzf1bjav0l3gLj1xGDE0,3288
11
+ biocypher/_deduplicate.py,sha256=BBvfpXzu6L5YDY5FdtXxnf8YlsbJpbCE8RdUoKsm0n0,4949
12
+ biocypher/_get.py,sha256=MHjHEqvPr4Z7Ud05qBcUJkR--iZ1SgUvUoft8MfwUic,8996
12
13
  biocypher/_logger.py,sha256=soYtz1DiduLFw3XrMnphWWUxeuJqvSof4AYhlafxl08,2933
13
14
  biocypher/_mapping.py,sha256=XJZjmXTPnXVkyub1ZU0h3EKXQ2YROaGaJOaGyPMqgy4,9338
14
- biocypher/_metadata.py,sha256=24UdhQ8vslHBfHf0S6oF9A5asyiM9SVOjfVqvRPnFvY,1658
15
+ biocypher/_metadata.py,sha256=CHGBWJ8qYrb7QNQO-Fk0ROkRDXHvtFECSoex9GytJ4A,1658
15
16
  biocypher/_misc.py,sha256=wsjGVOqBDVM5hxbE_TEaZ69u1kJc8HXwRAtQHUgE8XQ,4545
16
- biocypher/_ontology.py,sha256=vCGIHJn_IH5bmOMTA6GJQZB-eNVOlyjYnMzwmwfni0Q,21375
17
- biocypher/_pandas.py,sha256=2qaCtUCk_nhr8dCqXqUr8zgMhCetPh9EDq-3z-8Qxi0,2021
18
- biocypher/_translate.py,sha256=e5XhPxbPArd0aK-Zk7F533ECV12jMR_ZzoAlGD3TAzc,16540
19
- biocypher/_write.py,sha256=kOb_l1LMu_weu5RLxEDLvSrpgdU1PZZe7ObaNhJRkdU,66943
20
- biocypher-0.5.19.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
21
- biocypher-0.5.19.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
22
- biocypher-0.5.19.dist-info/METADATA,sha256=_7DNxOzmag2EO1vxTpjE7dcsX7YclymnoIVEkoMXlJ4,9103
23
- biocypher-0.5.19.dist-info/RECORD,,
17
+ biocypher/_ontology.py,sha256=pHc4hO8iZx-yg9gzqfBR9khoIni-lKAxWgnRFyNP91E,21530
18
+ biocypher/_pandas.py,sha256=GVCFM68J7yBjh40MpkNVgD8qT1RFMrrIjMOtD3iKsf4,3040
19
+ biocypher/_translate.py,sha256=nj4Y60F0U3JBH36N2dh5pFcC8Ot86rskJ2ChJwje9dI,16494
20
+ biocypher/_write.py,sha256=2ynF-VkvTr8WT2qPt2wji3iupP3WON94TlT6NpfDvCs,67738
21
+ biocypher-0.5.21.dist-info/LICENSE,sha256=SjUaQkq671iQUZOxEUpC4jvJxXOlfSiHTTueyz9kXJM,1065
22
+ biocypher-0.5.21.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
23
+ biocypher-0.5.21.dist-info/METADATA,sha256=wJ1Hnuq_erwEJRMCKA3e7VeUF7cLibnZdcnSCryynx0,9505
24
+ biocypher-0.5.21.dist-info/RECORD,,