hyperbase-parser-ab 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/CHANGELOG.md +14 -0
  2. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/PKG-INFO +3 -3
  3. hyperbase_parser_ab-0.3.0/VERSION +1 -0
  4. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/pyproject.toml +2 -2
  5. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/src/hyperbase_parser_ab/parser.py +134 -40
  6. hyperbase_parser_ab-0.3.0/src/hyperbase_parser_ab/repl.py +82 -0
  7. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/src/hyperbase_parser_ab/rules.py +2 -1
  8. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/tests/test_parser.py +85 -8
  9. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/tests/test_parser_helpers.py +2 -1
  10. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/tests/test_rules.py +1 -1
  11. hyperbase_parser_ab-0.2.0/VERSION +0 -1
  12. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/.github/workflows/publish.yml +0 -0
  13. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/.gitignore +0 -0
  14. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/.pre-commit-config.yaml +0 -0
  15. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/LICENSE +0 -0
  16. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/README.md +0 -0
  17. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/scripts/generate_alpha_training_data.py +0 -0
  18. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/scripts/train_atomizer.py +0 -0
  19. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/src/hyperbase_parser_ab/__init__.py +0 -0
  20. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/src/hyperbase_parser_ab/alpha.py +0 -0
  21. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/src/hyperbase_parser_ab/atomizer.py +0 -0
  22. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/src/hyperbase_parser_ab/lang_models.py +0 -0
  23. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/src/hyperbase_parser_ab/sentensizer.py +0 -0
  24. {hyperbase_parser_ab-0.2.0 → hyperbase_parser_ab-0.3.0}/tests/__init__.py +0 -0
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.3.0] - 11-04-2026
4
+
5
+ ### Added
6
+
7
+ - Maximum depth protection.
8
+ - Conjunction flattening.
9
+ - Show dependency parse tree on REPL.
10
+ - lang_namespace parameter, defaults to False (no language namespaces in atoms).
11
+
12
+ ### Changed
13
+
14
+ - Adopted new hyperbase API (0.10.0).
15
+ - Adopted REPL API.
16
+
3
17
  ## [0.2.0] - 05-04-2026
4
18
 
5
19
  ### Changed
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hyperbase-parser-ab
3
- Version: 0.2.0
4
- Summary: Semantic Hypergraph AlphaBeta Parser
3
+ Version: 0.3.0
4
+ Summary: Semantic Hypergraph Alpha-Beta Parser
5
5
  Project-URL: Homepage, https://hyperquest.ai/hyperbase
6
6
  Author-email: "Telmo Menezes et al." <telmo@telmomenezes.net>
7
7
  License-Expression: MIT
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
16
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
17
17
  Requires-Python: >=3.10
18
- Requires-Dist: hyperbase>=0.9.0
18
+ Requires-Dist: hyperbase>=0.10.0
19
19
  Requires-Dist: pip
20
20
  Requires-Dist: scikit-learn>=1.3.0
21
21
  Requires-Dist: spacy>=3.8.0
@@ -0,0 +1 @@
1
+ 0.3.0
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "hyperbase-parser-ab"
3
3
  dynamic = ["version"]
4
- description = "Semantic Hypergraph AlphaBeta Parser"
4
+ description = "Semantic Hypergraph Alpha-Beta Parser"
5
5
  readme = "README.md"
6
6
  license = "MIT"
7
7
  requires-python = ">=3.10"
@@ -26,7 +26,7 @@ classifiers = [
26
26
  "Topic :: Scientific/Engineering :: Information Analysis",
27
27
  ]
28
28
  dependencies = [
29
- "hyperbase>=0.9.0",
29
+ "hyperbase>=0.10.0",
30
30
  "scikit-learn>=1.3.0",
31
31
  "spacy>=3.8.0",
32
32
  "torch>=2.0.0",
@@ -4,16 +4,16 @@ from typing import Any, cast
4
4
 
5
5
  import hyperbase.constants as const
6
6
  import spacy
7
+ from hyperbase.builders import build_atom, hedge
7
8
  from hyperbase.hyperedge import (
8
9
  Atom,
9
10
  Hyperedge,
10
11
  UniqueAtom,
11
- build_atom,
12
- hedge,
13
12
  non_unique,
14
13
  unique,
15
14
  )
16
15
  from hyperbase.parsers import Parser, ParseResult
16
+ from hyperbase.parsers.utils import edge_depth_exceeds
17
17
  from spacy.language import Language
18
18
  from spacy.tokens import Doc, Span, Token
19
19
 
@@ -165,22 +165,67 @@ def _generate_tok_pos(atom2word: dict[Atom, tuple[str, int]], edge: Hyperedge) -
165
165
 
166
166
 
167
167
  class AlphaBetaParser(Parser):
168
- def __init__(
169
- self,
170
- lang: str,
171
- beta: str = "repair",
172
- normalise: bool = True,
173
- post_process: bool = True,
174
- debug: bool = False,
175
- ) -> None:
176
- super().__init__()
177
-
178
- self.lang: str = lang
179
-
180
- if lang not in SPACY_MODELS:
181
- raise RuntimeError(f"Language code '{lang}' is not recognized.")
182
-
183
- models: list[str] = SPACY_MODELS[lang]
168
+ @classmethod
169
+ def accepted_params(cls) -> dict[str, dict[str, Any]]:
170
+ return {
171
+ **super().accepted_params(),
172
+ "lang": {
173
+ "type": str,
174
+ "default": None,
175
+ "description": "Language code (e.g. 'de', 'en', 'fr').",
176
+ "required": True,
177
+ },
178
+ "beta": {
179
+ "type": str,
180
+ "default": "repair",
181
+ "description": "Beta stage rules: 'strict' or 'repair'.",
182
+ "required": False,
183
+ },
184
+ "normalise": {
185
+ "type": bool,
186
+ "default": True,
187
+ "description": "Enable normalization of parsed edges.",
188
+ "required": False,
189
+ },
190
+ "post_process": {
191
+ "type": bool,
192
+ "default": True,
193
+ "description": "Enable post-processing of edges.",
194
+ "required": False,
195
+ },
196
+ "debug": {
197
+ "type": bool,
198
+ "default": False,
199
+ "description": "Enable debug message output.",
200
+ "required": False,
201
+ },
202
+ "lang_namespace": {
203
+ "type": bool,
204
+ "default": False,
205
+ "description": (
206
+ "Include the language code as a namespace in atoms "
207
+ "(e.g. 'apple/Cc/en' instead of 'apple/Cc')."
208
+ ),
209
+ "required": False,
210
+ },
211
+ }
212
+
213
+ def __init__(self, params: dict[str, Any] | None = None) -> None:
214
+ super().__init__(params)
215
+
216
+ self.lang: str = self.params["lang"]
217
+
218
+ if self.lang not in SPACY_MODELS:
219
+ raise RuntimeError(f"Language code '{self.lang}' is not recognized.")
220
+
221
+ beta: str = self.params.get("beta", "repair")
222
+ normalise: bool = self.params.get("normalise", True)
223
+ post_process: bool = self.params.get("post_process", True)
224
+ debug: bool = self.params.get("debug", False)
225
+ lang_namespace: bool = self.params.get("lang_namespace", False)
226
+ self.atom_lang: str = self.lang if lang_namespace else ""
227
+
228
+ models: list[str] = SPACY_MODELS[self.lang]
184
229
 
185
230
  self.nlp: Language | None = None
186
231
  for model in models:
@@ -191,8 +236,8 @@ class AlphaBetaParser(Parser):
191
236
  if self.nlp is None:
192
237
  models_list: str = ", ".join(models)
193
238
  raise RuntimeError(
194
- f"Language '{lang}' requires one of the following language models:\n"
195
- f"{models_list}."
239
+ f"Language '{self.lang}' requires one of the following "
240
+ f"language models:\n{models_list}."
196
241
  )
197
242
 
198
243
  self.alpha: Alpha = Alpha(use_atomizer=True)
@@ -202,7 +247,7 @@ class AlphaBetaParser(Parser):
202
247
  elif beta == "repair":
203
248
  self.rules = repair_rules
204
249
  else:
205
- raise RuntimeError(f"unkown beta stage: {beta}")
250
+ raise RuntimeError(f"unknown beta stage: {beta}")
206
251
  self.normalise: bool = normalise
207
252
  self.post_process: bool = post_process
208
253
  self.debug: bool = debug
@@ -224,6 +269,11 @@ class AlphaBetaParser(Parser):
224
269
  if self.debug:
225
270
  print(msg)
226
271
 
272
+ def install_repl(self, session: object) -> None:
273
+ from hyperbase_parser_ab.repl import install
274
+
275
+ install(self, session)
276
+
227
277
  def parse_sentence(self, sentence: str) -> list[ParseResult]:
228
278
  # This runs spacy own sentensizer anyway...
229
279
 
@@ -264,6 +314,16 @@ class AlphaBetaParser(Parser):
264
314
  if result and len(result) == 1:
265
315
  edge = non_unique(result[0])
266
316
 
317
+ # Reject pathologically deep parses before they reach the
318
+ # recursive transforms below (which would otherwise blow the
319
+ # Python stack on inputs with extreme nesting).
320
+ if edge is not None and edge_depth_exceeds(edge, self.max_depth):
321
+ self.debug_msg(
322
+ f"Rejecting parse: edge depth exceeds max_depth="
323
+ f"{self.max_depth} for sentence: {sent!s}"
324
+ )
325
+ return None
326
+
267
327
  atom2word: dict[Atom, tuple[str, int]] = {}
268
328
  if edge:
269
329
  edge = self._apply_arg_roles(edge)
@@ -340,12 +400,12 @@ class AlphaBetaParser(Parser):
340
400
  # subject
341
401
  if dep in {"nsubj", "sb"}:
342
402
  return "s"
343
- # passive subject
403
+ # passive subject (becomes object)
344
404
  elif dep in {"nsubjpass", "nsubj:pass"}:
345
- return "p"
346
- # agent
405
+ return "o"
406
+ # agent (becomes subject)
347
407
  elif dep == "agent":
348
- return "a"
408
+ return "s"
349
409
  # object
350
410
  elif dep in {
351
411
  "obj",
@@ -361,17 +421,18 @@ class AlphaBetaParser(Parser):
361
421
  }:
362
422
  return "o"
363
423
  # indirect object
364
- elif dep in {"iobj", "dative", "obl:arg", "da"}:
365
- return "i"
366
- # specifier
367
- elif dep in {"advcl", "prep", "npadvmod", "advmod", "mo", "mnr"}:
424
+ elif dep in {"iobj", "dative", "obl:arg", "da"} or dep in {
425
+ "advcl",
426
+ "prep",
427
+ "npadvmod",
428
+ "advmod",
429
+ "mo",
430
+ "mnr",
431
+ }:
368
432
  return "x"
369
433
  # parataxis
370
- elif dep in {"parataxis", "par"}:
371
- return "t"
372
- # interjection
373
- elif dep in {"intj", "ng", "dm"}:
374
- return "j"
434
+ elif dep in {"parataxis", "par"} or dep in {"intj", "ng", "dm"}:
435
+ return "?"
375
436
  # clausal complement
376
437
  elif dep in {"xcomp", "ccomp", "oc"}:
377
438
  return "r"
@@ -451,7 +512,7 @@ class AlphaBetaParser(Parser):
451
512
  elif ent_type[0] == "M":
452
513
  atom = self._build_atom_modifier(token)
453
514
  else:
454
- atom = build_atom(text, et, self.lang)
515
+ atom = build_atom(text, et, self.atom_lang)
455
516
  return atom
456
517
 
457
518
  def _build_atom_predicate(
@@ -474,17 +535,25 @@ class AlphaBetaParser(Parser):
474
535
  else:
475
536
  ent_type = "Pd"
476
537
 
477
- return build_atom(text, ent_type, self.lang)
538
+ return build_atom(text, ent_type, self.atom_lang)
478
539
 
479
540
  def _build_atom_trigger(self, token: Token, ent_type: str) -> Atom:
480
541
  text: str = token.text.lower()
481
- et: str = "Tv" if _is_verb(token) else ent_type
482
- return build_atom(text, et, self.lang)
542
+
543
+ # indirect object
544
+ if token.dep_ in {"iobj", "dative", "obl:arg", "da"}:
545
+ et = "Ti"
546
+ elif _is_verb(token):
547
+ et = "Tv"
548
+ else:
549
+ et = ent_type
550
+
551
+ return build_atom(text, et, self.atom_lang)
483
552
 
484
553
  def _build_atom_modifier(self, token: Token) -> Atom:
485
554
  text: str = token.text.lower()
486
555
  et: str = "Mv" if _is_verb(token) else _modifier_type_and_subtype(token)
487
- return build_atom(text, et, self.lang)
556
+ return build_atom(text, et, self.atom_lang)
488
557
 
489
558
  def _repair(self, edge: Hyperedge) -> Hyperedge:
490
559
  if edge.not_atom:
@@ -768,7 +837,7 @@ class AlphaBetaParser(Parser):
768
837
  if len(sequence) < 2:
769
838
  return sequence, False
770
839
 
771
- def sentensize(self, text: str) -> list[str]:
840
+ def get_sentences(self, text: str) -> list[str]:
772
841
  if self.nlp:
773
842
  doc: Doc = self.nlp(text.strip())
774
843
  return [str(sent).strip() for sent in doc.sents]
@@ -893,9 +962,34 @@ class AlphaBetaParser(Parser):
893
962
  return self._replace_argroles(edge, _ars)
894
963
  return edge
895
964
 
965
+ def _flatten_conjunctions(self, edge: Hyperedge) -> Hyperedge:
966
+ if edge.atom:
967
+ return edge
968
+ new_edge: Hyperedge = hedge(
969
+ [self._flatten_conjunctions(subedge) for subedge in edge]
970
+ )
971
+ if new_edge is None:
972
+ return edge
973
+ edge = new_edge
974
+ if edge[0].mt != "J":
975
+ return edge
976
+ connector: Hyperedge = edge[0]
977
+ flattened: list[Hyperedge] = [connector]
978
+ changed: bool = False
979
+ for subedge in edge[1:]:
980
+ if subedge.not_atom and len(subedge) >= 2 and subedge[0] == connector:
981
+ flattened.extend(list(subedge[1:]))
982
+ changed = True
983
+ else:
984
+ flattened.append(subedge)
985
+ if changed:
986
+ return hedge(flattened)
987
+ return edge
988
+
896
989
  def _post_process(self, edge: Hyperedge | None) -> Hyperedge | None:
897
990
  if edge is None:
898
991
  return None
899
992
  _edge: Hyperedge = self._fix_argroles(edge)
900
993
  _edge = self._process_colon_conjunctions(_edge)
994
+ _edge = self._flatten_conjunctions(_edge)
901
995
  return _edge
@@ -0,0 +1,82 @@
1
+ """REPL integration for the AlphaBeta parser.
2
+
3
+ Adds a pre-result hook to the Hyperbase REPL that prints the spaCy
4
+ dependency parse tree for the current sentence. Imported lazily from
5
+ :meth:`AlphaBetaParser.install_repl` so that this module's only purpose
6
+ is keeping REPL-rendering code out of the parser core.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import TYPE_CHECKING
12
+
13
+ from hyperbase.parsers.repl_api import PreResultHook, ReplContext
14
+ from rich import box
15
+ from rich.console import Console
16
+ from rich.panel import Panel
17
+ from rich.text import Text
18
+ from rich.tree import Tree
19
+ from spacy.tokens import Token
20
+
21
+ if TYPE_CHECKING:
22
+ from hyperbase_parser_ab.parser import AlphaBetaParser
23
+
24
+
25
+ def _build_dependency_tree(
26
+ token: Token,
27
+ visited: set[Token] | None = None,
28
+ ) -> Tree | None:
29
+ """Build a Rich tree representation of a spaCy dependency parse."""
30
+ if visited is None:
31
+ visited = set()
32
+
33
+ if token in visited:
34
+ return None
35
+ visited.add(token)
36
+
37
+ label = Text()
38
+ label.append(token.text, style="bold white")
39
+ label.append(" [", style="dim")
40
+ label.append(f"dep_={token.dep_}", style="cyan")
41
+ label.append(", ", style="dim")
42
+ label.append(f"tag_={token.pos_}", style="yellow")
43
+ label.append("]", style="dim")
44
+
45
+ tree = Tree(label)
46
+
47
+ for child in token.children:
48
+ child_tree = _build_dependency_tree(child, visited)
49
+ if child_tree:
50
+ tree.add(child_tree)
51
+
52
+ return tree
53
+
54
+
55
+ def _make_pre_result_hook(parser: AlphaBetaParser) -> PreResultHook:
56
+ """Return a pre-result hook bound to *parser*'s spaCy doc."""
57
+
58
+ def hook(ctx: ReplContext) -> None:
59
+ doc = getattr(parser, "doc", None)
60
+ if doc is None:
61
+ return
62
+ console: Console = ctx.session.console
63
+ for sent in doc.sents:
64
+ dep_tree = _build_dependency_tree(sent.root)
65
+ if dep_tree is None:
66
+ continue
67
+ console.print()
68
+ console.print(
69
+ Panel(
70
+ dep_tree,
71
+ title="[bold cyan]Dependency Parse Tree[/bold cyan]",
72
+ border_style="cyan",
73
+ box=box.ROUNDED,
74
+ )
75
+ )
76
+
77
+ return hook
78
+
79
+
80
+ def install(parser: AlphaBetaParser, session: object) -> None:
81
+ """Register AlphaBeta-specific REPL behavior on *session*."""
82
+ session.register_pre_result_hook(_make_pre_result_hook(parser)) # type: ignore[attr-defined]
@@ -1,4 +1,5 @@
1
- from hyperbase.hyperedge import Hyperedge, hedge
1
+ from hyperbase import hedge
2
+ from hyperbase.hyperedge import Hyperedge
2
3
 
3
4
 
4
5
  class Rule:
@@ -3,7 +3,8 @@
3
3
  from unittest.mock import MagicMock, patch
4
4
 
5
5
  import pytest
6
- from hyperbase.hyperedge import UniqueAtom, hedge
6
+ from hyperbase import hedge
7
+ from hyperbase.hyperedge import UniqueAtom
7
8
 
8
9
  from hyperbase_parser_ab.parser import AlphaBetaParser
9
10
 
@@ -11,7 +12,7 @@ from hyperbase_parser_ab.parser import AlphaBetaParser
11
12
  class TestParserInitErrors:
12
13
  def test_unsupported_language_raises(self):
13
14
  with pytest.raises(RuntimeError, match="not recognized"):
14
- AlphaBetaParser("xx")
15
+ AlphaBetaParser({"lang": "xx"})
15
16
 
16
17
  def test_unknown_beta_stage_raises(self):
17
18
  with (
@@ -21,9 +22,9 @@ class TestParserInitErrors:
21
22
  patch("spacy.util.is_package", return_value=True),
22
23
  patch("spacy.load", return_value=MagicMock()),
23
24
  patch("hyperbase_parser_ab.parser.Alpha"),
24
- pytest.raises(RuntimeError, match="unkown beta stage"),
25
+ pytest.raises(RuntimeError, match="unknown beta stage"),
25
26
  ):
26
- AlphaBetaParser("en", beta="invalid")
27
+ AlphaBetaParser({"lang": "en", "beta": "invalid"})
27
28
 
28
29
  def test_no_spacy_model_installed_raises(self):
29
30
  with (
@@ -34,7 +35,7 @@ class TestParserInitErrors:
34
35
  patch("hyperbase_parser_ab.parser.Alpha"),
35
36
  pytest.raises(RuntimeError, match="requires one of the following"),
36
37
  ):
37
- AlphaBetaParser("en")
38
+ AlphaBetaParser({"lang": "en"})
38
39
 
39
40
 
40
41
  def _make_parser(beta="repair"):
@@ -46,7 +47,13 @@ def _make_parser(beta="repair"):
46
47
  patch("hyperbase_parser_ab.parser.Alpha"),
47
48
  ):
48
49
  parser = AlphaBetaParser(
49
- "en", beta=beta, normalise=True, post_process=True, debug=False
50
+ {
51
+ "lang": "en",
52
+ "beta": beta,
53
+ "normalise": True,
54
+ "post_process": True,
55
+ "debug": False,
56
+ }
50
57
  )
51
58
  return parser
52
59
 
@@ -144,7 +151,7 @@ class TestParserRelationArgRole:
144
151
  parser.atom2token = {uatom: token}
145
152
  parser.orig_atom = {uatom: uatom}
146
153
  parser.depths = {uatom: 1}
147
- assert parser._relation_arg_role(edge) == "p"
154
+ assert parser._relation_arg_role(edge) == "o"
148
155
 
149
156
  def test_indirect_object(self):
150
157
  parser = _make_parser()
@@ -155,7 +162,7 @@ class TestParserRelationArgRole:
155
162
  parser.atom2token = {uatom: token}
156
163
  parser.orig_atom = {uatom: uatom}
157
164
  parser.depths = {uatom: 1}
158
- assert parser._relation_arg_role(edge) == "i"
165
+ assert parser._relation_arg_role(edge) == "x"
159
166
 
160
167
  def test_specifier(self):
161
168
  parser = _make_parser()
@@ -244,6 +251,76 @@ class TestParserDebug:
244
251
  assert capsys.readouterr().out == ""
245
252
 
246
253
 
254
+ class TestParserFlattenConjunctions:
255
+ def test_flatten_atom_unchanged(self):
256
+ parser = _make_parser()
257
+ atom = hedge("red/Ca/en")
258
+ assert parser._flatten_conjunctions(atom) == atom
259
+
260
+ def test_flatten_no_conjunction_unchanged(self):
261
+ parser = _make_parser()
262
+ edge = hedge("(runs/Pd/en cat/Cc/en dog/Cc/en)")
263
+ assert parser._flatten_conjunctions(edge) == edge
264
+
265
+ def test_flatten_simple_conjunction_unchanged(self):
266
+ """A flat conjunction with no nested conjunctions stays the same."""
267
+ parser = _make_parser()
268
+ edge = hedge("(,/J red/Ca/en green/Ca/en blue/Ca/en)")
269
+ assert parser._flatten_conjunctions(edge) == edge
270
+
271
+ def test_flatten_nested_same_connector(self):
272
+ """(,/J red (,/J green blue)) → (,/J red green blue)"""
273
+ parser = _make_parser()
274
+ edge = hedge("(,/J red/Ca/en (,/J green/Ca/en blue/Ca/en))")
275
+ expected = hedge("(,/J red/Ca/en green/Ca/en blue/Ca/en)")
276
+ assert parser._flatten_conjunctions(edge) == expected
277
+
278
+ def test_flatten_nested_different_connector_unchanged(self):
279
+ """Nested conjunction with a different connector should NOT be flattened."""
280
+ parser = _make_parser()
281
+ edge = hedge("(,/J red/Ca/en (and/J/en green/Ca/en blue/Ca/en))")
282
+ assert parser._flatten_conjunctions(edge) == edge
283
+
284
+ def test_flatten_recursive_bottom_up(self):
285
+ """Multiple levels of nesting should all collapse."""
286
+ parser = _make_parser()
287
+ edge = hedge("(,/J red/Ca/en (,/J green/Ca/en (,/J blue/Ca/en yellow/Ca/en)))")
288
+ expected = hedge("(,/J red/Ca/en green/Ca/en blue/Ca/en yellow/Ca/en)")
289
+ assert parser._flatten_conjunctions(edge) == expected
290
+
291
+ def test_flatten_multiple_nested_conjunctions(self):
292
+ """(,/J (,/J a b) (,/J c d)) → (,/J a b c d)"""
293
+ parser = _make_parser()
294
+ edge = hedge("(,/J (,/J a/Ca/en b/Ca/en) (,/J c/Ca/en d/Ca/en))")
295
+ expected = hedge("(,/J a/Ca/en b/Ca/en c/Ca/en d/Ca/en)")
296
+ assert parser._flatten_conjunctions(edge) == expected
297
+
298
+ def test_flatten_inside_outer_edge(self):
299
+ """A nested conjunction inside a non-conjunction outer edge is still
300
+ flattened bottom-up."""
301
+ parser = _make_parser()
302
+ edge = hedge(
303
+ "(runs/Pd/en cat/Cc/en (,/J red/Ca/en (,/J green/Ca/en blue/Ca/en)))"
304
+ )
305
+ expected = hedge(
306
+ "(runs/Pd/en cat/Cc/en (,/J red/Ca/en green/Ca/en blue/Ca/en))"
307
+ )
308
+ assert parser._flatten_conjunctions(edge) == expected
309
+
310
+ def test_flatten_mixed_connectors_partial(self):
311
+ """Only the matching nested conjunctions should be flattened."""
312
+ parser = _make_parser()
313
+ edge = hedge(
314
+ "(,/J red/Ca/en (,/J green/Ca/en blue/Ca/en)"
315
+ " (and/J/en yellow/Ca/en purple/Ca/en))"
316
+ )
317
+ expected = hedge(
318
+ "(,/J red/Ca/en green/Ca/en blue/Ca/en"
319
+ " (and/J/en yellow/Ca/en purple/Ca/en))"
320
+ )
321
+ assert parser._flatten_conjunctions(edge) == expected
322
+
323
+
247
324
  class TestParserReset:
248
325
  def test_reset_clears_state(self):
249
326
  parser = _make_parser()
@@ -2,7 +2,8 @@
2
2
 
3
3
  from unittest.mock import MagicMock
4
4
 
5
- from hyperbase.hyperedge import UniqueAtom, hedge
5
+ from hyperbase import hedge
6
+ from hyperbase.hyperedge import UniqueAtom
6
7
 
7
8
  from hyperbase_parser_ab.parser import (
8
9
  _builder_type_and_subtype,
@@ -1,4 +1,4 @@
1
- from hyperbase.hyperedge import hedge
1
+ from hyperbase import hedge
2
2
 
3
3
  from hyperbase_parser_ab.rules import Rule, apply_rule, repair_rules, strict_rules
4
4
 
@@ -1 +0,0 @@
1
- 0.2.0