assemblycfg 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- assemblycfg/__init__.py +20 -0
- assemblycfg/cfg_ai.py +421 -0
- assemblycfg/det.py +707 -0
- assemblycfg/utils.py +572 -0
- assemblycfg-1.2.2.dist-info/METADATA +70 -0
- assemblycfg-1.2.2.dist-info/RECORD +14 -0
- assemblycfg-1.2.2.dist-info/WHEEL +5 -0
- assemblycfg-1.2.2.dist-info/licenses/LICENSE +21 -0
- assemblycfg-1.2.2.dist-info/top_level.txt +3 -0
- examples/example.py +31 -0
- examples/lipids.py +238 -0
- tests/__init__.py +0 -0
- tests/test_det.py +254 -0
- tests/test_general.py +138 -0
assemblycfg/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from .cfg_ai import (ai_core, repair_with_pathways)
|
|
2
|
+
|
|
3
|
+
from .det import (calculate_assembly_path_det)
|
|
4
|
+
|
|
5
|
+
from .utils import (safe_standardize_mol,
|
|
6
|
+
smi_to_mol,
|
|
7
|
+
smi_to_nx,
|
|
8
|
+
mol_to_nx,
|
|
9
|
+
remove_hydrogen_from_graph,
|
|
10
|
+
bond_order_rdkit_to_int,
|
|
11
|
+
get_disconnected_subgraphs,
|
|
12
|
+
molfile_to_mol,
|
|
13
|
+
standardize_mol,
|
|
14
|
+
nx_to_dict,
|
|
15
|
+
dict_to_nx,
|
|
16
|
+
mol2graph,
|
|
17
|
+
print_graph_dict,
|
|
18
|
+
print_virtual_objects)
|
|
19
|
+
|
|
20
|
+
__version__ = "1.2.1"
|
assemblycfg/cfg_ai.py
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
import string
|
|
3
|
+
from typing import List, Tuple, Dict, Union
|
|
4
|
+
|
|
5
|
+
import networkx as nx
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def rules_to_graph(rules: List[str],
|
|
9
|
+
virt_obj: List[str]) -> nx.DiGraph:
|
|
10
|
+
"""
|
|
11
|
+
Convert a list of join rules into a NetworkX directed graph and add virtual nodes.
|
|
12
|
+
|
|
13
|
+
Parses rules in the form ``"A + B = C"`` and adds directed edges ``A -> C`` and
|
|
14
|
+
``B -> C`` for each rule. Virtual objects are added to the graph as isolated
|
|
15
|
+
nodes (if not already present).
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
rules : list of str
|
|
20
|
+
Sequence of rules where each rule is expected to contain two operands and
|
|
21
|
+
a result using the literal separators ``' + '`` and `` = '`` (e.g.
|
|
22
|
+
``"A + B = C"``). Empty list is accepted and yields a graph containing only
|
|
23
|
+
the provided ``virt_obj`` nodes.
|
|
24
|
+
virt_obj : list of str
|
|
25
|
+
Sequence of virtual object identifiers to be added as nodes to the graph.
|
|
26
|
+
Nodes that also appear in parsed rules are not duplicated.
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
networkx.DiGraph
|
|
31
|
+
A directed graph with nodes for all operands, results and virtual objects.
|
|
32
|
+
For each rule ``"A + B = C"`` there will be edges ``A -> C`` and ``B -> C``.
|
|
33
|
+
|
|
34
|
+
Raises
|
|
35
|
+
------
|
|
36
|
+
ValueError
|
|
37
|
+
If a rule does not contain exactly two operands and one result when split
|
|
38
|
+
using the expected separators (e.g. malformed string).
|
|
39
|
+
"""
|
|
40
|
+
# Create a directed graph and add virtual objects as nodes
|
|
41
|
+
graph = nx.DiGraph()
|
|
42
|
+
graph.add_nodes_from(virt_obj)
|
|
43
|
+
|
|
44
|
+
# Add edges based on rules
|
|
45
|
+
for rule in rules:
|
|
46
|
+
a, b, c = rule.replace(" + ", " = ").split(" = ")
|
|
47
|
+
graph.add_edge(a, c)
|
|
48
|
+
graph.add_edge(b, c)
|
|
49
|
+
|
|
50
|
+
return graph
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def repair(s: Union[str, list[str]]) -> Tuple[List[List[str]], Dict[str, List[str]]]:
|
|
54
|
+
"""
|
|
55
|
+
Iteratively replace the most frequent adjacent symbol pairs in a string with new non-terminal symbols.
|
|
56
|
+
|
|
57
|
+
Scans the input string for adjacent symbol pairs, identifies pairs that occur more than
|
|
58
|
+
once, and replaces the most frequent pair with a freshly introduced non-terminal
|
|
59
|
+
(e.g. ``A1``, ``A2``). Replacements are applied repeatedly until no pair occurs
|
|
60
|
+
more than once. Returns the resulting symbol sequence and a mapping of introduced
|
|
61
|
+
non-terminals to the pair they replaced.
|
|
62
|
+
|
|
63
|
+
Parameters
|
|
64
|
+
----------
|
|
65
|
+
s : str or list of str
|
|
66
|
+
Input string of symbols (each character treated as a symbol). The function
|
|
67
|
+
operates on single-character symbols and produces new multi-character
|
|
68
|
+
non-terminal symbols of the form ``A{n}``.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
symbols : list of str
|
|
73
|
+
Sequence of symbols after all replacements. Contains original terminal
|
|
74
|
+
symbols and introduced non-terminal symbols (strings like ``'A1'``).
|
|
75
|
+
productions : dict
|
|
76
|
+
Mapping from introduced non-terminal symbol to the two-symbol list it
|
|
77
|
+
replaces, e.g. ``{'A1': ['a', 'b']}``.
|
|
78
|
+
|
|
79
|
+
Raises
|
|
80
|
+
------
|
|
81
|
+
TypeError
|
|
82
|
+
If ``s`` is not a string.
|
|
83
|
+
"""
|
|
84
|
+
if isinstance(s, str):
|
|
85
|
+
symbols: List[List[str]] = [list(s)]
|
|
86
|
+
else:
|
|
87
|
+
if not all(isinstance(subs, str) for subs in s):
|
|
88
|
+
raise TypeError("Input must be a string or a list of strings.")
|
|
89
|
+
symbols: List[List[str]] = [list(subs) for subs in s]
|
|
90
|
+
|
|
91
|
+
# Input safety check
|
|
92
|
+
for symbol in symbols:
|
|
93
|
+
for s in symbol:
|
|
94
|
+
assert s in string.ascii_lowercase, "Input string must consist of lowercase ASCII characters only."
|
|
95
|
+
|
|
96
|
+
productions: Dict[str, List[str]] = {}
|
|
97
|
+
non_terminal_counter: int = 1
|
|
98
|
+
|
|
99
|
+
while True:
|
|
100
|
+
# Count the frequency of adjacent pairs and filter those occurring more than once
|
|
101
|
+
|
|
102
|
+
pair_counts = collections.Counter()
|
|
103
|
+
for subs in symbols:
|
|
104
|
+
pair_counts.update(zip(subs, subs[1:]))
|
|
105
|
+
|
|
106
|
+
frequent_pairs = {pair: count for pair, count in pair_counts.items() if count > 1}
|
|
107
|
+
|
|
108
|
+
if not frequent_pairs:
|
|
109
|
+
break
|
|
110
|
+
|
|
111
|
+
# Find the most frequent pair and create a new non-terminal
|
|
112
|
+
most_frequent_pair = max(frequent_pairs, key=frequent_pairs.get)
|
|
113
|
+
new_non_terminal = f'A{non_terminal_counter}'
|
|
114
|
+
non_terminal_counter += 1
|
|
115
|
+
productions[new_non_terminal] = list(most_frequent_pair)
|
|
116
|
+
|
|
117
|
+
for idx, subs in enumerate(symbols):
|
|
118
|
+
i = 0
|
|
119
|
+
while i < len(subs) - 1:
|
|
120
|
+
# Check if the current pair matches the most frequent pair
|
|
121
|
+
if (subs[i], subs[i + 1]) == most_frequent_pair:
|
|
122
|
+
# Replace the pair with the new non-terminal
|
|
123
|
+
subs[i:i + 2] = [new_non_terminal]
|
|
124
|
+
i = max(i - 1, 0) # Step back to handle overlapping pairs
|
|
125
|
+
else:
|
|
126
|
+
i += 1
|
|
127
|
+
symbols[idx] = subs
|
|
128
|
+
|
|
129
|
+
return symbols, productions
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def convert_to_cnf(start_symbols: Union[str, List[str]],
|
|
133
|
+
productions: Dict[str, List[str]]) -> Tuple[str, Dict[str, List[str]]]:
|
|
134
|
+
"""
|
|
135
|
+
Convert a context-free grammar (CFG) to Chomsky Normal Form (CNF).
|
|
136
|
+
|
|
137
|
+
Transforms the given productions by:
|
|
138
|
+
- mapping lowercase terminal symbols to fresh terminal non-terminals of the form ``T_<symbol>``;
|
|
139
|
+
- introducing new non-terminals ``N<n>`` to break right-hand sides longer than two into binary rules;
|
|
140
|
+
- ensuring a single start non-terminal (``S``) whose right-hand side has length one or two.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
start_symbols : Union[str, List[str]]
|
|
145
|
+
The start symbol or start sequence of symbols for the grammar. Can be a
|
|
146
|
+
string (each character treated as a symbol) or a list of symbol strings.
|
|
147
|
+
productions : dict
|
|
148
|
+
Mapping from non-terminal symbols (keys) to lists of symbols (values). Each value is a list
|
|
149
|
+
representing the right-hand side of a production. Terminals are assumed to be lowercase ASCII
|
|
150
|
+
characters; other strings are treated as non-terminals.
|
|
151
|
+
|
|
152
|
+
Returns
|
|
153
|
+
-------
|
|
154
|
+
start_nts : str
|
|
155
|
+
The new start non-terminal (conventionally ``S``).
|
|
156
|
+
cnf_productions : dict
|
|
157
|
+
A dictionary mapping non-terminals to CNF-compliant right-hand sides. Right-hand sides are
|
|
158
|
+
either a single terminal (e.g. ``['T_a']``) or two non-terminals (e.g. ``['N1', 'B']``).
|
|
159
|
+
Terminal mappings for each original terminal are included as productions (``'T_a': ['a']``).
|
|
160
|
+
|
|
161
|
+
Raises
|
|
162
|
+
------
|
|
163
|
+
TypeError
|
|
164
|
+
If ``start_symbol`` is not a string or if ``productions`` is not a mapping of non-terminals to lists.
|
|
165
|
+
ValueError
|
|
166
|
+
If a production contains an empty right-hand side.
|
|
167
|
+
|
|
168
|
+
"""
|
|
169
|
+
cnf_productions: Dict[str, List[str]] = {}
|
|
170
|
+
new_nt_counter: int = 1
|
|
171
|
+
|
|
172
|
+
if isinstance(start_symbols, str):
|
|
173
|
+
start_symbols = list(start_symbols)
|
|
174
|
+
|
|
175
|
+
# Map terminals to new non-terminals
|
|
176
|
+
terminals = {s for exp in productions.values() for s in exp if s in string.ascii_lowercase}
|
|
177
|
+
for start_symbol in start_symbols:
|
|
178
|
+
terminals.update(s for s in start_symbol if s in string.ascii_lowercase)
|
|
179
|
+
terminal_map: Dict[str, str] = {t: f'T_{t}' for t in terminals}
|
|
180
|
+
cnf_productions.update({nt: [t] for t, nt in terminal_map.items()})
|
|
181
|
+
|
|
182
|
+
def replace_terminals(symbols: List[str]) -> List[str]:
|
|
183
|
+
return [terminal_map.get(s, s) for s in symbols]
|
|
184
|
+
|
|
185
|
+
# Convert productions to CNF
|
|
186
|
+
for nt, expansion in productions.items():
|
|
187
|
+
expansion = replace_terminals(expansion)
|
|
188
|
+
while len(expansion) > 2:
|
|
189
|
+
new_nt = f'N{new_nt_counter}'
|
|
190
|
+
cnf_productions[new_nt] = expansion[:2]
|
|
191
|
+
expansion = [new_nt] + expansion[2:]
|
|
192
|
+
new_nt_counter += 1
|
|
193
|
+
cnf_productions[nt] = expansion
|
|
194
|
+
|
|
195
|
+
# Handle the start symbol
|
|
196
|
+
for idx, word in enumerate(start_symbols):
|
|
197
|
+
word = replace_terminals(list(word))
|
|
198
|
+
start_nt = 'S_' + str(idx)
|
|
199
|
+
while len(word) > 2:
|
|
200
|
+
new_nt = f'N{new_nt_counter}'
|
|
201
|
+
cnf_productions[new_nt] = word[:2]
|
|
202
|
+
word = [new_nt] + word[2:]
|
|
203
|
+
new_nt_counter += 1
|
|
204
|
+
cnf_productions[start_nt] = word
|
|
205
|
+
|
|
206
|
+
return start_nt, cnf_productions
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def ai_core(s: Union[str, List[str]],
|
|
210
|
+
debug: bool = False) -> Tuple[int, Dict[str, List[str]]]:
|
|
211
|
+
"""
|
|
212
|
+
Convert an input string into Chomsky Normal Form (CNF) and compute a production count.
|
|
213
|
+
|
|
214
|
+
This function performs a two-stage transformation: it first derives a set of
|
|
215
|
+
productions and an augmented start symbol using the `repair` routine, then
|
|
216
|
+
transforms those productions into CNF using `convert_to_cnf`. It also returns
|
|
217
|
+
a scalar measure (`ai_count`) derived from the intermediate start symbol and
|
|
218
|
+
the number of introduced productions.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
s : Union[str, List[str]]
|
|
223
|
+
Input string of symbols to be processed. Each character is treated as a
|
|
224
|
+
terminal symbol for the initial `repair` pass.
|
|
225
|
+
debug : bool, optional
|
|
226
|
+
If True, print intermediate values (start symbol, productions, CNF
|
|
227
|
+
productions and their lengths) to stdout for debugging. Default is False.
|
|
228
|
+
|
|
229
|
+
Returns
|
|
230
|
+
-------
|
|
231
|
+
ai_count : int
|
|
232
|
+
Integer count computed as `len(start_symbol) - 1 + len(productions)` where
|
|
233
|
+
`start_symbol` and `productions` are the outputs of `repair(s)`. Intended
|
|
234
|
+
as a simple complexity/path-length metric used by downstream logic.
|
|
235
|
+
cnf_productions : dict
|
|
236
|
+
Mapping of non-terminal symbols to CNF-compliant right-hand sides. Each
|
|
237
|
+
value is a list of one terminal-nonterminal mapping (e.g. ``['T_a']``) or
|
|
238
|
+
two non-terminals (e.g. ``['N1', 'B']``).
|
|
239
|
+
|
|
240
|
+
Raises
|
|
241
|
+
------
|
|
242
|
+
TypeError
|
|
243
|
+
If ``s`` is not a string or a list of strings.
|
|
244
|
+
"""
|
|
245
|
+
start_symbols, productions = repair(s)
|
|
246
|
+
start_nts, cnf_productions = convert_to_cnf(start_symbols, productions)
|
|
247
|
+
if debug:
|
|
248
|
+
print(f"Start symbols: {start_symbols}", flush=True)
|
|
249
|
+
print(f"Productions: {productions}", flush=True)
|
|
250
|
+
print(f"Length of Productions: {len(productions)}", flush=True)
|
|
251
|
+
print(f"CNF Productions: {cnf_productions}", flush=True)
|
|
252
|
+
print(f"Length of CNF Productions: {len(cnf_productions)}", flush=True)
|
|
253
|
+
# return len(cnf_productions) - len(set(s)), cnf_productions
|
|
254
|
+
|
|
255
|
+
# Use temp to count number of terminal symbols (ai = # of non-terminal producing cnf production rules)
|
|
256
|
+
temp = ""
|
|
257
|
+
for obj in set(s):
|
|
258
|
+
temp += obj
|
|
259
|
+
|
|
260
|
+
return len(cnf_productions) - len(set(temp)), cnf_productions
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def get_rules(s: Union[str, List[str]],
|
|
264
|
+
production: Dict[str, List[str]],
|
|
265
|
+
f_print: bool = False) -> List[str]:
|
|
266
|
+
"""
|
|
267
|
+
Generate join rules from productions by performing a topological sort.
|
|
268
|
+
|
|
269
|
+
Performs a Kahn-style topological traversal over the dependency graph
|
|
270
|
+
defined by `production` and the initial symbols in `s`. Builds string
|
|
271
|
+
mappings for intermediate non-terminals and emits join rules of the form
|
|
272
|
+
``"<left> + <right> = <result>"`` when a non-terminal expands to two symbols.
|
|
273
|
+
|
|
274
|
+
Parameters
|
|
275
|
+
----------
|
|
276
|
+
s : Union[str, List[str]]
|
|
277
|
+
Input string or list of strings being processed.
|
|
278
|
+
production : dict
|
|
279
|
+
Mapping from non-terminal symbol to its right-hand side as a list of
|
|
280
|
+
symbols. Right-hand sides are expected to be length 1 or 2. Keys that
|
|
281
|
+
appear in `production` are treated as nodes that depend on the symbols in
|
|
282
|
+
their value list.
|
|
283
|
+
f_print : bool, optional
|
|
284
|
+
If True, print processing diagnostics (start symbols, joins) to stdout.
|
|
285
|
+
Default is False.
|
|
286
|
+
|
|
287
|
+
Returns
|
|
288
|
+
-------
|
|
289
|
+
rules : list of str
|
|
290
|
+
List of join rules in topologically valid order. Each rule is formatted
|
|
291
|
+
as ``"A + B = C"`` where ``A`` and ``B`` are the left-hand constituents and
|
|
292
|
+
``C`` is the computed result string for the non-terminal.
|
|
293
|
+
|
|
294
|
+
Raises
|
|
295
|
+
------
|
|
296
|
+
TypeError
|
|
297
|
+
If ``s`` is not a string or ``production`` is not a mapping-like object.
|
|
298
|
+
ValueError
|
|
299
|
+
If the dependency graph contains a cycle, the returned rules may be
|
|
300
|
+
incomplete; callers should validate acyclicity prior to calling if full
|
|
301
|
+
coverage is required.
|
|
302
|
+
"""
|
|
303
|
+
in_degrees: Dict[str, int] = collections.defaultdict(int)
|
|
304
|
+
adj: Dict[str, List[str]] = collections.defaultdict(list)
|
|
305
|
+
tmap: Dict[str, str] = {}
|
|
306
|
+
|
|
307
|
+
# Initialize in-degrees and adjacency list
|
|
308
|
+
for c in s:
|
|
309
|
+
in_degrees[c] = 0
|
|
310
|
+
for course, prereqs in production.items():
|
|
311
|
+
for req in prereqs:
|
|
312
|
+
in_degrees[course] += 1
|
|
313
|
+
adj[req].append(course)
|
|
314
|
+
|
|
315
|
+
# Start queue with symbols having zero in-degrees
|
|
316
|
+
start_q: collections.deque[str] = collections.deque(
|
|
317
|
+
symbol for symbol, ins in in_degrees.items() if ins == 0
|
|
318
|
+
)
|
|
319
|
+
if f_print:
|
|
320
|
+
print(f"Processing {s}", flush=True)
|
|
321
|
+
print(f"Start symbols: {', '.join(start_q)}", flush=True)
|
|
322
|
+
print("Joins:", flush=True)
|
|
323
|
+
|
|
324
|
+
# Perform topological sort
|
|
325
|
+
rules: List[str] = []
|
|
326
|
+
while start_q:
|
|
327
|
+
symbol = start_q.popleft()
|
|
328
|
+
tmap[symbol] = symbol
|
|
329
|
+
for neighbor in adj[symbol]:
|
|
330
|
+
in_degrees[neighbor] -= 1
|
|
331
|
+
if in_degrees[neighbor] == 0:
|
|
332
|
+
start_q.append(neighbor)
|
|
333
|
+
|
|
334
|
+
if symbol in production:
|
|
335
|
+
if len(production[symbol]) == 2:
|
|
336
|
+
a, b = production[symbol]
|
|
337
|
+
tmap[symbol] = tmap[a] + tmap[b]
|
|
338
|
+
rules.append(f"{tmap[a]} + {tmap[b]} = {tmap[symbol]}")
|
|
339
|
+
else:
|
|
340
|
+
tmap[symbol] = production[symbol][0]
|
|
341
|
+
|
|
342
|
+
if f_print:
|
|
343
|
+
print("\n".join(rules), flush=True)
|
|
344
|
+
|
|
345
|
+
return rules
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def extract_virtual_objects(rules: List[str]) -> List[str]:
|
|
349
|
+
"""
|
|
350
|
+
Extract virtual objects from a list of join rules, excluding the final rule's result.
|
|
351
|
+
|
|
352
|
+
Parameters
|
|
353
|
+
----------
|
|
354
|
+
rules : list of str
|
|
355
|
+
Sequence of join rules formatted as ``"A + B = C"``. An empty sequence
|
|
356
|
+
results in an empty list.
|
|
357
|
+
|
|
358
|
+
Returns
|
|
359
|
+
-------
|
|
360
|
+
virt_objs : list of str
|
|
361
|
+
Sorted list of unique object identifiers found in the rules, excluding the
|
|
362
|
+
right-hand side (result) of the last rule. Sorting is by string length
|
|
363
|
+
(ascending).
|
|
364
|
+
"""
|
|
365
|
+
if not rules:
|
|
366
|
+
return []
|
|
367
|
+
|
|
368
|
+
# Extract all objects from the rules
|
|
369
|
+
objects = {obj for rule in rules for obj in rule.replace(" + ", " = ").split(" = ")}
|
|
370
|
+
|
|
371
|
+
# Remove the last result
|
|
372
|
+
last_result = rules[-1].split(" = ")[-1]
|
|
373
|
+
objects.discard(last_result)
|
|
374
|
+
|
|
375
|
+
# Return the sorted list of objects by string length
|
|
376
|
+
return sorted(objects, key=len)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def repair_with_pathways(s: Union[str, List[str]], f_print: bool = False, debug: bool = False) -> Tuple[
|
|
380
|
+
int, List[str], nx.DiGraph]:
|
|
381
|
+
"""
|
|
382
|
+
Compute pathway information from an input string: path length, virtual objects, and a rules graph.
|
|
383
|
+
|
|
384
|
+
Performs grammar repair and CNF conversion via `ai_core`, derives join rules with `get_rules`,
|
|
385
|
+
extracts virtual objects with `extract_virtual_objects`, and builds a directed graph of rules
|
|
386
|
+
with `rules_to_graph`.
|
|
387
|
+
|
|
388
|
+
Parameters
|
|
389
|
+
----------
|
|
390
|
+
s : Union[str, List[str]]
|
|
391
|
+
Input string or list of strings to be processed. Each character is treated as an initial terminal symbol.
|
|
392
|
+
f_print : bool, optional
|
|
393
|
+
If True, print join-rule processing diagnostics and the computed rules. Default is False.
|
|
394
|
+
debug : bool, optional
|
|
395
|
+
If True, enable debug printing in the underlying `ai_core` stage. Default is False.
|
|
396
|
+
|
|
397
|
+
Returns
|
|
398
|
+
-------
|
|
399
|
+
ai_count : int
|
|
400
|
+
Integer path-length metric returned by `ai_core` (heuristic count of introduced productions
|
|
401
|
+
and start-symbol length).
|
|
402
|
+
virt_obj : list of str
|
|
403
|
+
Sorted list of virtual object identifiers extracted from the join rules (excluding the final
|
|
404
|
+
rule result). Sorted by string length ascending.
|
|
405
|
+
rules_graph : networkx.DiGraph
|
|
406
|
+
Directed graph representing join relationships. For each join rule ``"A + B = C"`` there
|
|
407
|
+
are edges ``A -> C`` and ``B -> C``; virtual objects are added as isolated nodes when present.
|
|
408
|
+
|
|
409
|
+
Raises
|
|
410
|
+
------
|
|
411
|
+
TypeError
|
|
412
|
+
If ``s`` is not a string. Underlying routines may raise additional errors for malformed
|
|
413
|
+
productions or rules.
|
|
414
|
+
"""
|
|
415
|
+
# Get the production rules and path length
|
|
416
|
+
path_len, productions = ai_core(s, debug=debug)
|
|
417
|
+
# Get the rules
|
|
418
|
+
rules = get_rules(s, productions, f_print=f_print)
|
|
419
|
+
# Extract virtual objects
|
|
420
|
+
virt_obj = extract_virtual_objects(rules)
|
|
421
|
+
return path_len, virt_obj, rules_to_graph(rules, virt_obj)
|