rdworks 0.25.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. rdworks/__init__.py +35 -0
  2. rdworks/autograph/__init__.py +4 -0
  3. rdworks/autograph/autograph.py +184 -0
  4. rdworks/autograph/centroid.py +90 -0
  5. rdworks/autograph/dynamictreecut.py +135 -0
  6. rdworks/autograph/nmrclust.py +123 -0
  7. rdworks/autograph/rckmeans.py +74 -0
  8. rdworks/bitqt/__init__.py +1 -0
  9. rdworks/bitqt/bitqt.py +355 -0
  10. rdworks/conf.py +374 -0
  11. rdworks/descriptor.py +36 -0
  12. rdworks/display.py +206 -0
  13. rdworks/ionized.py +170 -0
  14. rdworks/matchedseries.py +260 -0
  15. rdworks/mol.py +1522 -0
  16. rdworks/mollibr.py +887 -0
  17. rdworks/pka.py +38 -0
  18. rdworks/predefined/Asinex_fragment.xml +20 -0
  19. rdworks/predefined/Astex_RO3.xml +16 -0
  20. rdworks/predefined/Baell2010_PAINS/Baell2010A.xml +52 -0
  21. rdworks/predefined/Baell2010_PAINS/Baell2010B.xml +169 -0
  22. rdworks/predefined/Baell2010_PAINS/Baell2010C.xml +1231 -0
  23. rdworks/predefined/Baell2010_PAINS/PAINS-less-than-015-hits.xml +2048 -0
  24. rdworks/predefined/Baell2010_PAINS/PAINS-less-than-150-hits.xml +278 -0
  25. rdworks/predefined/Baell2010_PAINS/PAINS-more-than-150-hits.xml +83 -0
  26. rdworks/predefined/Baell2010_PAINS/makexml.py +70 -0
  27. rdworks/predefined/Brenk2008_Dundee/makexml.py +21 -0
  28. rdworks/predefined/CNS.xml +18 -0
  29. rdworks/predefined/ChEMBL_Walters/BMS.xml +543 -0
  30. rdworks/predefined/ChEMBL_Walters/Dundee.xml +318 -0
  31. rdworks/predefined/ChEMBL_Walters/Glaxo.xml +168 -0
  32. rdworks/predefined/ChEMBL_Walters/Inpharmatica.xml +276 -0
  33. rdworks/predefined/ChEMBL_Walters/LINT.xml +174 -0
  34. rdworks/predefined/ChEMBL_Walters/MLSMR.xml +351 -0
  35. rdworks/predefined/ChEMBL_Walters/PAINS.xml +1446 -0
  36. rdworks/predefined/ChEMBL_Walters/SureChEMBL.xml +501 -0
  37. rdworks/predefined/ChEMBL_Walters/makexml.py +40 -0
  38. rdworks/predefined/Hann1999_Glaxo/Hann1999.xml +168 -0
  39. rdworks/predefined/Hann1999_Glaxo/Hann1999Acid.xml +102 -0
  40. rdworks/predefined/Hann1999_Glaxo/Hann1999Base.xml +6 -0
  41. rdworks/predefined/Hann1999_Glaxo/Hann1999ElPh.xml +6 -0
  42. rdworks/predefined/Hann1999_Glaxo/Hann1999NuPh.xml +6 -0
  43. rdworks/predefined/Hann1999_Glaxo/makexml.py +83 -0
  44. rdworks/predefined/Kazius2005/Kazius2005.xml +114 -0
  45. rdworks/predefined/Kazius2005/makexml.py +66 -0
  46. rdworks/predefined/ZINC_druglike.xml +24 -0
  47. rdworks/predefined/ZINC_fragment.xml +14 -0
  48. rdworks/predefined/ZINC_leadlike.xml +15 -0
  49. rdworks/predefined/fragment.xml +7 -0
  50. rdworks/predefined/ionized/simple_smarts_pattern.csv +57 -0
  51. rdworks/predefined/ionized/smarts_pattern.csv +107 -0
  52. rdworks/predefined/misc/makexml.py +119 -0
  53. rdworks/predefined/misc/reactive-part-2.xml +104 -0
  54. rdworks/predefined/misc/reactive-part-3.xml +74 -0
  55. rdworks/predefined/misc/reactive.xml +321 -0
  56. rdworks/readin.py +312 -0
  57. rdworks/rgroup.py +2173 -0
  58. rdworks/scaffold.py +520 -0
  59. rdworks/std.py +143 -0
  60. rdworks/stereoisomers.py +127 -0
  61. rdworks/tautomers.py +20 -0
  62. rdworks/units.py +63 -0
  63. rdworks/utils.py +495 -0
  64. rdworks/xml.py +260 -0
  65. rdworks-0.25.7.dist-info/METADATA +37 -0
  66. rdworks-0.25.7.dist-info/RECORD +69 -0
  67. rdworks-0.25.7.dist-info/WHEEL +5 -0
  68. rdworks-0.25.7.dist-info/licenses/LICENSE +21 -0
  69. rdworks-0.25.7.dist-info/top_level.txt +1 -0
rdworks/utils.py ADDED
@@ -0,0 +1,495 @@
1
+ import numpy as np
2
+ import math
3
+ import networkx as nx
4
+ import gzip
5
+ import operator
6
+ import re
7
+ import shlex
8
+
9
+ from rdkit import Chem
10
+ from pathlib import Path
11
+ from typing import Any, Callable
12
+ from functools import reduce
13
+ from concurrent.futures import ProcessPoolExecutor
14
+ from tqdm import tqdm
15
+
16
+ from rdworks.autograph.centroid import centroid_medoid
17
+
18
+
19
+ def compute(fn:Callable, largs: list, **kwargs) -> list:
20
+ max_workers = kwargs.get('max_workers', 1)
21
+ chunksize = kwargs.get('chunksize', 10)
22
+ progress = kwargs.get('progress', False)
23
+ desc = kwargs.get('desc', 'Progress')
24
+ n = len(largs)
25
+ if max_workers > 1:
26
+ with ProcessPoolExecutor(max_workers=max_workers) as executor:
27
+ if progress:
28
+ results = list(tqdm(executor.map(fn, largs, chunksize=chunksize), desc=desc, total=n))
29
+ else:
30
+ results = list(executor.map(fn, largs, chunksize=chunksize))
31
+ else:
32
+ if progress:
33
+ results = [ fn(*larg) for larg in tqdm(largs, desc=desc, total=n) ]
34
+ else:
35
+ results = [ fn(*larg) for larg in largs ]
36
+ return results
37
+
38
+
39
+
40
+ def precheck_path(path:str | Path) -> Path:
41
+ """Prechecks filename or path and returns a string for the pathlib.PosixPath.
42
+
43
+ Args:
44
+ path (Union[str, PosixPath]): filename or path.
45
+ origin (str): origin of data.
46
+
47
+ Raises:
48
+ FileNotFoundError: if the path is not found.
49
+
50
+ Returns:
51
+ str: a string for the path.
52
+ """
53
+ if isinstance(path, Path):
54
+ pass
55
+ elif isinstance(path, str):
56
+ path = Path(path)
57
+ if path.exists() and path.is_file():
58
+ return path
59
+ else:
60
+ raise FileNotFoundError(f"File path {path.as_posix()} does not exist.")
61
+
62
+
63
+
64
+ def guess_mol_id(lprops:list[dict]) -> tuple[str, int, int]:
65
+ """Guesses molecular ID from SDF properties.
66
+
67
+ Molecular ID is guessed by the coverage(=count of unique values divided by total count).
68
+ A property suitable for ID should have coverage of 1.0.
69
+
70
+ Args:
71
+ lprops (List[dict]): a list of properties.
72
+
73
+ Returns:
74
+ Tuple[str, int, int]: (property, count of unique values, total count)
75
+ """
76
+ f = {} # unique occurrence
77
+ for props in lprops:
78
+ for k in props:
79
+ v = props[k]
80
+ # float is not suitable for molecular id
81
+ if isinstance(v, float):
82
+ continue
83
+ if not (k in f):
84
+ f[k] = set()
85
+ # str(int) is acceptable for molecular id
86
+ if isinstance(v, int):
87
+ f[k].add(str(v))
88
+ elif isinstance(v, str):
89
+ f[k].add(v)
90
+ r = [(k, len(f[k]), -max([len(x) for x in f[k]])) for k in f]
91
+ r = sorted(r, key=operator.itemgetter(1,2))
92
+ try:
93
+ (property_key, count, total) = (r[-1][0], r[-1][1], len(lprops))
94
+ except IndexError:
95
+ (property_key, count, total) = (None, 0, 0)
96
+ return (property_key, count, total)
97
+
98
+
99
+
100
+ def fix_decimal_places_in_list(in_list:list, decimal_places:int=2) -> list:
101
+ """Fixes the decimal places of all float values in a list.
102
+
103
+ Args:
104
+ list: The list to fix.
105
+ decimal_places (int): The number of decimal places to fix the float values to.
106
+
107
+ Returns:
108
+ list: a list with the float values fixed to the specified number of decimal places.
109
+ """
110
+
111
+ out_list = []
112
+ for item in in_list:
113
+ if isinstance(item, float):
114
+ out_list.append(round(item, decimal_places))
115
+ elif isinstance(item, dict):
116
+ out_list.append(fix_decimal_places_in_dict(item, decimal_places))
117
+ elif isinstance(item, list) or isinstance(item, tuple):
118
+ out_list.append(fix_decimal_places_in_list(item, decimal_places))
119
+ else:
120
+ out_list.append(item)
121
+ return out_list
122
+
123
+
124
+ def fix_decimal_places_in_dict(in_dict:dict, decimal_places:int=2) -> dict:
125
+ """Fixes the decimal places of all float values in a dictionary.
126
+
127
+ Args:
128
+ dictionary: The dictionary to fix.
129
+ decimal_places (int): The number of decimal places to fix the float values to.
130
+
131
+ Returns:
132
+ dict: a dictionary with the float values fixed to the specified number of decimal places.
133
+ """
134
+ out_dict = {}
135
+ for k, v in in_dict.items():
136
+ if isinstance(v, float):
137
+ out_dict[k] = round(v, decimal_places)
138
+ elif isinstance(v, list) or isinstance(v, tuple):
139
+ out_dict[k] = fix_decimal_places_in_list(v, decimal_places)
140
+ elif isinstance(v, dict):
141
+ out_dict[k] = fix_decimal_places_in_dict(v, decimal_places)
142
+ else:
143
+ out_dict[k] = v
144
+ return out_dict
145
+
146
+
147
+ def convert_tril_to_symm(lower_triangle_values:list) -> np.ndarray:
148
+ """Converts lower triangle values to a symmetric full matrix.
149
+
150
+ Args:
151
+ lower_triangle_values (list): list of lower triangle matrix values.
152
+
153
+ Returns:
154
+ np.ndarray: numpy array of a symmetric full matrix.
155
+ """
156
+ n = math.ceil(math.sqrt(len(lower_triangle_values)*2))
157
+ rmsd_matrix = np.zeros((n,n))
158
+ rmsd_matrix[np.tril_indices(n, k=-1)] = lower_triangle_values
159
+ symm_matrix = np.maximum(rmsd_matrix, rmsd_matrix.transpose())
160
+ return symm_matrix
161
+
162
+
163
+ def convert_triu_to_symm(upper_triangle_values:list) -> np.ndarray:
164
+ """Converts upper triangle values to a symmetric full matrix.
165
+
166
+ Args:
167
+ upper_triangle_values (list): list of upper triangle matrix values.
168
+
169
+ Returns:
170
+ np.ndarray: numpy array of a symmetric full matrix.
171
+ """
172
+ n = math.ceil(math.sqrt(len(upper_triangle_values)*2))
173
+ rmsd_matrix = np.zeros((n,n))
174
+ rmsd_matrix[np.triu_indices(n, k=1)] = upper_triangle_values
175
+ symm_matrix = np.maximum(rmsd_matrix, rmsd_matrix.transpose())
176
+ return symm_matrix
177
+
178
+
179
+ def _QT_diameter(rmsd_matrix:np.ndarray, A:list) -> float:
180
+ """A subroutine for `QT()` to returns the maximum pairwise distance.
181
+
182
+ Args:
183
+ rmsd_matrix (np.ndarray): numpy array of rmsd.
184
+ A (list): list of indexes.
185
+
186
+ Returns:
187
+ float: maximum pairwise distance.
188
+ """
189
+ return np.max(rmsd_matrix[A][:,A])
190
+
191
+
192
+ def _QT_clustering(rmsd_matrix:np.ndarray, G:set, threshold:float, clusters:list) -> list:
193
+ """A subroutine for `QT()` to perform QTC algorithm.
194
+
195
+ Args:
196
+ rmsd_matrix (np.ndarray): pairwise rmsd matrix.
197
+ G (set): set of indexes used for recursive calling.
198
+ threshold (float): quality threshold (A).
199
+ clusters (list): list of clusters used for recursive calling.
200
+
201
+ Returns:
202
+ list: a list of final clusters.
203
+ """
204
+
205
+ if len(G) <= 1:
206
+ clusters.append(G)
207
+ return
208
+ C = [] # cluster candidates
209
+ for i in G:
210
+ flag = True
211
+ A = [i]
212
+ A_diameter = 0.0 # max of pairwise distances
213
+ while flag and A != G:
214
+ # find j that minimize diameter of A + [j]
215
+ diameters = [(_QT_diameter(rmsd_matrix, A + [j]), j) for j in G if j not in A]
216
+ if len(diameters) == 0:
217
+ flag = False
218
+ else:
219
+ (min_diameter, min_j) = min(diameters, key=lambda x: x[0])
220
+ if min_diameter > threshold:
221
+ flag = False
222
+ else:
223
+ A += [min_j]
224
+ A_diameter = min_diameter
225
+ C.append((A, A_diameter))
226
+ C = sorted(C, key=lambda x: (len(x[0]), -x[1]), reverse=True)
227
+ # if cardinality of C is tied, smaller diameter is picked
228
+ largest_C = set(C[0][0])
229
+ clusters.append(largest_C)
230
+ _QT_clustering(rmsd_matrix, G-largest_C, threshold, clusters)
231
+
232
+
233
+ def QT(rmsd_matrix:np.ndarray, threshold:float) -> tuple:
234
+ """Perform QT clustering.
235
+
236
+ Args:
237
+ rmsd_matrix (np.ndarray): pairwise rmsd matrix.
238
+ threshold (float): quality threshold (A)
239
+
240
+ Returns:
241
+ tuple: (cluster assignment, centroid indices)
242
+ """
243
+ N = rmsd_matrix.shape[0]
244
+ clusters = []
245
+ _QT_clustering(rmsd_matrix, set(list(range(N))), threshold, clusters)
246
+ # ex. clusters= [{6, 7, 11}, {4, 5, 8}, {0}, {1}, {10}, {9}, {2}, {3}]
247
+ cluster_assignment = [None,] * N
248
+ for cluster_idx, indices in enumerate(clusters):
249
+ for conf_idx in indices:
250
+ cluster_assignment[conf_idx] = cluster_idx
251
+ centroid_indices = centroid_medoid(cluster_assignment, rmsd_matrix)
252
+ return cluster_assignment, centroid_indices
253
+
254
+
255
+ def rdmol_to_graph(rdmol:Chem.Mol) -> nx.Graph:
256
+ """Converts rdkit.Chem.Mol to a networkx graph object.
257
+
258
+ Args:
259
+ rdmol (Chem.Mol): input molecule.
260
+
261
+ Returns:
262
+ nx.Graph: networkx graph object.
263
+ """
264
+ G = nx.Graph()
265
+ for atom in rdmol.GetAtoms():
266
+ G.add_node(atom.GetIdx(), # 0-based index
267
+ atomic_num=atom.GetAtomicNum(),
268
+ formal_charge=atom.GetFormalCharge(),
269
+ chiral_tag=atom.GetChiralTag(),
270
+ hybridization=atom.GetHybridization(),
271
+ num_explicit_hs=atom.GetNumExplicitHs(),
272
+ is_aromatic=atom.GetIsAromatic())
273
+ for bond in rdmol.GetBonds():
274
+ G.add_edge(bond.GetBeginAtomIdx(),
275
+ bond.GetEndAtomIdx(),
276
+ bond_type=bond.GetBondType())
277
+ return G
278
+
279
+
280
+ def rdmol_to_graph_(rdmol:Chem.Mol) -> nx.Graph:
281
+ """Converts rdkit.Chem.Mol to a networkx graph object (another implementation).
282
+
283
+ Args:
284
+ rdmol (Chem.Mol): input molecule.
285
+
286
+ Returns:
287
+ nx.Graph: networkx graph object.
288
+ """
289
+ atomic_nums = [atom.GetAtomicNum() for atom in rdmol.GetAtoms()]
290
+ formal_charges = [atom.GetFormalCharge() for atom in rdmol.GetAtoms()]
291
+ ad_matrix = Chem.GetAdjacencyMatrix(rdmol, useBO=True)
292
+ # useBO: (optional) toggles use of bond orders in calculating the matrix. Default value is 0.
293
+ # RETURNS: a Numeric array of floats containing the adjacency matrix
294
+ # [[0. 1. 0. 0. 0. 0. 0. 0. 0.]
295
+ # [1. 0. 1. 1. 1. 0. 0. 0. 0.]
296
+ # [0. 1. 0. 0. 0. 0. 0. 0. 0.]
297
+ # [0. 1. 0. 0. 0. 0. 0. 0. 0.]
298
+ # [0. 1. 0. 0. 0. 1. 0. 1. 0.]
299
+ # [0. 0. 0. 0. 1. 0. 2. 0. 0.]
300
+ # [0. 0. 0. 0. 0. 2. 0. 0. 0.]
301
+ # [0. 0. 0. 0. 1. 0. 0. 0. 2.]
302
+ # [0. 0. 0. 0. 0. 0. 0. 2. 0.]]
303
+ for i,(a_num,f_c) in enumerate(zip(atomic_nums, formal_charges)):
304
+ if f_c !=0:
305
+ ad_matrix[i,i] = a_num + f_c
306
+ else:
307
+ ad_matrix[i,i] = a_num
308
+ G = nx.from_numpy_array(ad_matrix)
309
+ return G
310
+
311
+
312
+ def graph_to_rdmol(G:nx.Graph) -> Chem.Mol:
313
+ """Converts a networkx graph object to rdkit.Chem.Mol object.
314
+
315
+ Args:
316
+ G (nx.Graph): a networkx graph.
317
+
318
+ Returns:
319
+ Chem.Mol: rdkit.Chem.Mol object.
320
+ """
321
+ rdmol = Chem.RWMol()
322
+ atomic_nums = nx.get_node_attributes(G, 'atomic_num')
323
+ chiral_tags = nx.get_node_attributes(G, 'chiral_tag')
324
+ formal_charges = nx.get_node_attributes(G, 'formal_charge')
325
+ node_is_aromatics = nx.get_node_attributes(G, 'is_aromatic')
326
+ node_hybridizations = nx.get_node_attributes(G, 'hybridization')
327
+ num_explicit_hss = nx.get_node_attributes(G, 'num_explicit_hs')
328
+ node_to_idx = {}
329
+ for node in G.nodes():
330
+ a=Chem.Atom(atomic_nums[node])
331
+ a.SetChiralTag(chiral_tags[node])
332
+ a.SetFormalCharge(formal_charges[node])
333
+ a.SetIsAromatic(node_is_aromatics[node])
334
+ a.SetHybridization(node_hybridizations[node])
335
+ a.SetNumExplicitHs(num_explicit_hss[node])
336
+ idx = rdmol.AddAtom(a)
337
+ node_to_idx[node] = idx
338
+ bond_types = nx.get_edge_attributes(G, 'bond_type')
339
+ for edge in G.edges():
340
+ first, second = edge
341
+ ifirst = node_to_idx[first]
342
+ isecond = node_to_idx[second]
343
+ bond_type = bond_types[first, second]
344
+ rdmol.AddBond(ifirst, isecond, bond_type)
345
+ Chem.SanitizeMol(rdmol)
346
+ return rdmol
347
+
348
+
349
+ def mae_rd_index(mol_dict:dict, smiles:str) -> dict:
350
+ """Returns a map for atom indexes between a rdkit.Chem.Mol and a maestro file.
351
+
352
+ It uses networkx's `vf2pp_all_isomorphisms()` function.
353
+
354
+ Args:
355
+ mol_dict (dict): a dictionary generated from a maestro file.
356
+ smiles (str): SMILES of the molecule.
357
+
358
+ Returns:
359
+ dict: a map for atom indexes (maestro -> rdkit.Chem.Mol)
360
+ """
361
+ bond_order_map = {
362
+ Chem.BondType.SINGLE : 1.0,
363
+ Chem.BondType.DOUBLE : 2.0,
364
+ Chem.BondType.TRIPLE : 3.0,
365
+ Chem.BondType.AROMATIC : 1.5,
366
+ Chem.BondType.UNSPECIFIED : 0.0,
367
+ }
368
+
369
+ G = nx.Graph()
370
+ for idx, atomic_num in enumerate(mol_dict['f_m_ct']['m_atom']['i_m_atomic_number'], start=1):
371
+ G.add_node(idx, atomic_num=int(atomic_num))
372
+ for (bond_from, bond_to, bond_order) in zip(mol_dict['f_m_ct']['m_bond']['i_m_from'],
373
+ mol_dict['f_m_ct']['m_bond']['i_m_to'],
374
+ mol_dict['f_m_ct']['m_bond']['i_m_order']):
375
+ G.add_edge(int(bond_from), int(bond_to), bond_order=int(bond_order))
376
+
377
+ H = nx.Graph()
378
+ rdmol = Chem.MolFromSmiles(smiles)
379
+ rdmol = Chem.AddHs(rdmol)
380
+ for atom in rdmol.GetAtoms():
381
+ H.add_node(atom.GetIdx(), atomic_num=atom.GetAtomicNum())
382
+ for bond in rdmol.GetBonds():
383
+ H.add_edge(bond.GetBeginAtomIdx(),
384
+ bond.GetEndAtomIdx(),
385
+ bond_order=bond_order_map[bond.GetBondType()])
386
+
387
+ try:
388
+ assert nx.is_isomorphic(G, H)
389
+ return nx.vf2pp_isomorphism(G, H, node_label="atomic_num")
390
+ except:
391
+ return {}
392
+
393
+
394
+ def _get_from_dict(dataDict:dict, mapList:list) -> None:
395
+ """A subroutine for `mae_to_dict()`.
396
+
397
+ Args:
398
+ dataDict (dict): data dictionary.
399
+ mapList (list): map list.
400
+ """
401
+ return reduce(operator.getitem, mapList, dataDict)
402
+
403
+
404
+ def _set_in_dict(dataDict:dict, mapList:list, value:Any) -> None:
405
+ """A subroutine for `mae_to_dict()`.
406
+
407
+ Args:
408
+ dataDict (dict): data dictionary.
409
+ mapList (list): map list.
410
+ value (Any): value to set.
411
+ """
412
+ if mapList:
413
+ _get_from_dict(dataDict, mapList[:-1])[mapList[-1]] = value
414
+ else:
415
+ for k,v in value.items():
416
+ dataDict[k] = v
417
+
418
+
419
+
420
+ def mae_to_dict(path:str | Path) -> dict:
421
+ """Converts Schrodinger Maestro file to a dictionary.
422
+
423
+ Args:
424
+ path (Union[str, Path]): filename or path to a .mae or .maegz file.
425
+
426
+ Returns:
427
+ dict: python dictionary.
428
+ """
429
+ tokens = None
430
+ if isinstance(path, str):
431
+ path = Path(path)
432
+ if path.suffix == 'gz':
433
+ with gzip.open(path, "rt") as f:
434
+ tokens = shlex.split(f.read())
435
+ else:
436
+ with open(path, "r") as f:
437
+ tokens = shlex.split(f.read())
438
+ count = re.compile(r'(\w+)\[(\d+)\]')
439
+ DATA = []
440
+ level = []
441
+ data = {}
442
+ previous_token = None
443
+ header = False
444
+ extra_column = 0
445
+ num_repeat = 1
446
+ skip = False
447
+ for token in tokens :
448
+ if token == "#" :
449
+ skip = not skip # invert
450
+ continue
451
+ elif skip:
452
+ continue
453
+ elif token == "{" :
454
+ header = True
455
+ key = []
456
+ val = []
457
+ arr = []
458
+ if previous_token:
459
+ if previous_token == "f_m_ct" and data:
460
+ DATA.append(data)
461
+ data = {}
462
+ try:
463
+ (block, num_repeat) = count.findall(previous_token)[0]
464
+ num_repeat = int(num_repeat)
465
+ extra_column = 1
466
+ except:
467
+ block = previous_token
468
+ num_repeat = 1
469
+ extra_column = 0
470
+ level.append(block)
471
+
472
+ elif token == "}":
473
+ if level:
474
+ level.pop()
475
+ elif token == ":::":
476
+ header = False
477
+ elif header:
478
+ key.append(token)
479
+ else:
480
+ val.append(token)
481
+ # only store f_m_ct blocks (level != [])
482
+ if len(val) == (len(key)+extra_column) and level :
483
+ arr.append(val[extra_column:])
484
+ val = []
485
+ if len(arr) == num_repeat:
486
+ if len(arr) == 1:
487
+ _set_in_dict(data, level, dict(zip(key,arr[0])))
488
+ else:
489
+ T = list(zip(*arr)) # transpose
490
+ _set_in_dict(data, level, dict(zip(key,T)))
491
+ previous_token = token
492
+ if data:
493
+ DATA.append(data)
494
+
495
+ return DATA