hyperbase 0.9.0__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. {hyperbase-0.9.0 → hyperbase-0.10.0}/CHANGELOG.md +84 -1
  2. {hyperbase-0.9.0 → hyperbase-0.10.0}/PKG-INFO +1 -1
  3. hyperbase-0.10.0/VERSION +1 -0
  4. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/installation.md +3 -3
  5. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/parsers.md +23 -43
  6. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/readers.md +1 -1
  7. {hyperbase-0.9.0 → hyperbase-0.10.0}/pyproject.toml +1 -1
  8. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/__init__.py +1 -1
  9. hyperbase-0.10.0/src/hyperbase/builders.py +187 -0
  10. hyperbase-0.10.0/src/hyperbase/cli/__init__.py +202 -0
  11. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/cli/read.py +26 -14
  12. hyperbase-0.10.0/src/hyperbase/cli/repl.py +819 -0
  13. hyperbase-0.10.0/src/hyperbase/constants.py +105 -0
  14. hyperbase-0.10.0/src/hyperbase/correctness.py +150 -0
  15. hyperbase-0.10.0/src/hyperbase/hyperedge.py +634 -0
  16. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/loaders.py +5 -2
  17. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/parsers/__init__.py +12 -4
  18. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/parsers/parse_result.py +14 -2
  19. hyperbase-0.10.0/src/hyperbase/parsers/parser.py +175 -0
  20. hyperbase-0.10.0/src/hyperbase/parsers/repl_api.py +63 -0
  21. hyperbase-0.10.0/src/hyperbase/parsers/utils.py +44 -0
  22. hyperbase-0.10.0/src/hyperbase/patterns/__init__.py +10 -0
  23. hyperbase-0.10.0/src/hyperbase/patterns/checks.py +71 -0
  24. hyperbase-0.10.0/src/hyperbase/patterns/combine.py +353 -0
  25. hyperbase-0.10.0/src/hyperbase/patterns/matcher.py +643 -0
  26. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/readers/reader.py +4 -4
  27. hyperbase-0.10.0/src/hyperbase/transforms.py +155 -0
  28. hyperbase-0.10.0/tests/test_correctness_errors.py +275 -0
  29. {hyperbase-0.9.0 → hyperbase-0.10.0}/tests/test_hyperedge.py +3 -2
  30. {hyperbase-0.9.0 → hyperbase-0.10.0}/tests/test_hyperedge_text.py +1 -14
  31. {hyperbase-0.9.0 → hyperbase-0.10.0}/tests/test_load_edges.py +7 -8
  32. hyperbase-0.10.0/tests/test_malformed_input.py +178 -0
  33. hyperbase-0.10.0/tests/test_parser_plugin.py +196 -0
  34. {hyperbase-0.9.0 → hyperbase-0.10.0}/tests/test_patterns.py +179 -87
  35. hyperbase-0.10.0/tests/test_performance.py +149 -0
  36. hyperbase-0.10.0/tests/test_reader_plugin.py +266 -0
  37. hyperbase-0.9.0/VERSION +0 -1
  38. hyperbase-0.9.0/src/hyperbase/cli/__init__.py +0 -161
  39. hyperbase-0.9.0/src/hyperbase/cli/repl.py +0 -854
  40. hyperbase-0.9.0/src/hyperbase/constants.py +0 -4
  41. hyperbase-0.9.0/src/hyperbase/hyperedge.py +0 -1120
  42. hyperbase-0.9.0/src/hyperbase/parsers/correctness.py +0 -326
  43. hyperbase-0.9.0/src/hyperbase/parsers/parser.py +0 -88
  44. hyperbase-0.9.0/src/hyperbase/parsers/utils.py +0 -19
  45. hyperbase-0.9.0/src/hyperbase/patterns/__init__.py +0 -95
  46. hyperbase-0.9.0/src/hyperbase/patterns/argroles.py +0 -142
  47. hyperbase-0.9.0/src/hyperbase/patterns/atoms.py +0 -98
  48. hyperbase-0.9.0/src/hyperbase/patterns/common.py +0 -181
  49. hyperbase-0.9.0/src/hyperbase/patterns/matcher.py +0 -235
  50. hyperbase-0.9.0/src/hyperbase/patterns/merge.py +0 -58
  51. hyperbase-0.9.0/src/hyperbase/patterns/properties.py +0 -61
  52. hyperbase-0.9.0/src/hyperbase/patterns/utils.py +0 -118
  53. hyperbase-0.9.0/src/hyperbase/patterns/variables.py +0 -152
  54. hyperbase-0.9.0/tests/test_correctness.py +0 -360
  55. {hyperbase-0.9.0 → hyperbase-0.10.0}/.github/workflows/docs.yml +0 -0
  56. {hyperbase-0.9.0 → hyperbase-0.10.0}/.github/workflows/publish.yml +0 -0
  57. {hyperbase-0.9.0 → hyperbase-0.10.0}/.gitignore +0 -0
  58. {hyperbase-0.9.0 → hyperbase-0.10.0}/.pre-commit-config.yaml +0 -0
  59. {hyperbase-0.9.0 → hyperbase-0.10.0}/AUTHORS +0 -0
  60. {hyperbase-0.9.0 → hyperbase-0.10.0}/LICENSE +0 -0
  61. {hyperbase-0.9.0 → hyperbase-0.10.0}/README.md +0 -0
  62. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/atom-structure.png +0 -0
  63. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/camille.jpg +0 -0
  64. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/chih-chun.jpg +0 -0
  65. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/kexinren.jpg +0 -0
  66. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/max.jpg +0 -0
  67. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/telmo.jpg +0 -0
  68. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/yael-stein.jpg +0 -0
  69. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/block.png +0 -0
  70. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/favicon.png +0 -0
  71. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/graphbrain.png +0 -0
  72. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/hyper-vs-graph.png +0 -0
  73. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/hyperedge-blocks.png +0 -0
  74. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/logo_hyperquest_small_nav.svg +0 -0
  75. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/one-pagers/GB-A.pdf +0 -0
  76. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/one-pagers/GB-A_thumb.png +0 -0
  77. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/one-pagers/GB-C.pdf +0 -0
  78. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/one-pagers/GB-C_thumb.png +0 -0
  79. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/show.png +0 -0
  80. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/socsemics.png +0 -0
  81. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/vblock.png +0 -0
  82. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/stylesheets/extra.css +0 -0
  83. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/authors.md +0 -0
  84. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/index.md +0 -0
  85. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/api.md +0 -0
  86. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/discovering-patterns.md +0 -0
  87. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/hyperedges.md +0 -0
  88. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/notation.md +0 -0
  89. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/overview.md +0 -0
  90. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/patterns.md +0 -0
  91. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/pubs-cases.md +0 -0
  92. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/tutorials/parsing-a-sentence.md +0 -0
  93. {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/tutorials/playing-with-hyperedges.md +0 -0
  94. {hyperbase-0.9.0 → hyperbase-0.10.0}/mkdocs.yml +0 -0
  95. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/cli/parsers.py +0 -0
  96. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/data/__init__.py +0 -0
  97. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/data/wikipedia/discard_sections.txt +0 -0
  98. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/patterns/counter.py +0 -0
  99. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/readers/__init__.py +0 -0
  100. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/readers/txt.py +0 -0
  101. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/readers/url.py +0 -0
  102. {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/readers/wikipedia.py +0 -0
  103. {hyperbase-0.9.0 → hyperbase-0.10.0}/tests/__init__.py +0 -0
@@ -1,8 +1,39 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.10.0] - 11-04-2026
4
+
5
+ ### Added
6
+
7
+ - `[]` pattern notation for specifying sequences of arguments.
8
+ - EdgeType and ArgRole enums.
9
+ - safety cap for match (`_MAX_ARGROLE_ITEMS=10`) against pathological edge arities.
10
+ - caching of computed `Hyperedge`/`Atom` properties.
11
+ - `parse_to_jsonl` method on `Parser`.
12
+ - unified parameter interface for parsers.
13
+ - method `Parser.accepted_params`.
14
+ - maximum depth protection for parsers.
15
+ - repl api for parsers.
16
+
17
+ ### Changed
18
+
19
+ - multiple patterns functions are now `Hyperedge`/`Atom` methods: `is_wildcard`, `is_pattern`, `is_fun_pattern`, `is_variable`, `contains_variable`, `variable_name`.
20
+ - `hyperbase.py` now delegating to smaller modules with well-defined concerns: `builders.py`, `correctness.py`, `transforms.py`, `patterns.checks.py` and `patterns.matcher.py`.
21
+ - replaced `itertools.permutations` with constraint-propagated backtracking in argrole matcher.
22
+ - `parse_text` renamed to `parse`; old iterator-based `parse` removed.
23
+ - `read_source` renamed to `parse_source`; `read_source_to_jsonl` renamed to `parse_source_to_jsonl`.
24
+ - renamed `sentensize` to `get_sentences`.
25
+ - hedge now uses an explicit stack instead of recursion (so that pathologically
26
+ nested edge strings cannot exhaust Python's call stack).
27
+ - renamed parsers.correctness to parsers.badness.
28
+
29
+ ### Removed
30
+
31
+ - `__add__` operator overloading in `Hyperedge`/`Atom`.
32
+
3
33
  ## [0.9.0] - 05-04-2026
4
34
 
5
35
  ### Added
36
+
6
37
  - readers (txt, url, wikipedia).
7
38
  - cli interface with repl, parsers, readers.
8
39
  - hyperedge.Hyperedge.match function (calls parsers.match_pattern).
@@ -11,6 +42,7 @@
11
42
  - load_edges function.
12
43
 
13
44
  ### Changed
45
+
14
46
  - added get_parser to main functions (at hyperbase root).
15
47
  - improved documentation.
16
48
  - hedge now accepts ParseResults and can recursively add Hyperedge.text strings.
@@ -25,23 +57,27 @@
25
57
  - Renamed Hyperedge.normalized to normalise.
26
58
 
27
59
  ### Removed
60
+
28
61
  - function patterns.edge_matches_pattern.
29
62
  - deprecated and obsolete methods from Hyperedge: is_atom, to_str, roots, insert_first_argument, connect, sequence, contains_atom_type, main_concepts, replace_main_concept, has_argroles.
30
63
 
31
64
  ## [0.8.0] - 26-03-2026 - hyperbase is the successor of graphbrain
32
65
 
33
66
  ### Added
67
+
34
68
  - parser plugin foundation.
35
69
  - more comprehensive Hyperedge.check_correctness.
36
70
  - check parse correctness.
37
71
  - type checking: full code coverage.
38
72
 
39
73
  ### Changed
74
+
40
75
  - renamed library to hyperbase.
41
76
  - trimmed down library to the essentials: hyperedge, patterns and parser foundations.
42
77
  - converted documentation to Material for MkDocs.
43
78
 
44
79
  ### Removed
80
+
45
81
  - hypergraph module, hypergraph database (memory module).
46
82
  - alphabeta parser implementation.
47
83
  - old scripts, examples, processors.
@@ -51,7 +87,9 @@
51
87
  - obsolete constants.
52
88
 
53
89
  ## [0.7.0] - 05-03-2026
90
+
54
91
  ### Added
92
+
55
93
  - patterns.is_wildcard().
56
94
  - Base class hypergraph.memory.keyvalue.KeyValue for key-value hypergraph databases, removing redundant code between LevelDB and SQLite.
57
95
  - Tests for LevelDB (only the SQLite Hypergraph implementation was being directly tested).
@@ -63,6 +101,7 @@
63
101
  - Hypergraph.get_attributes().
64
102
 
65
103
  ### Changed
104
+
66
105
  - Entire project is now in pure Python
67
106
  - Python >=3.10 now required.
68
107
  - Hypergraph.search(), .match() and .count() now working with functional patterns and argument role matching.
@@ -72,19 +111,25 @@
72
111
  - Matches from patterns with repeated variables are collected in lists.
73
112
 
74
113
  ### Removed
114
+
75
115
  - graphbrain.logic obsolete module.
76
116
  - LevelDB backend
77
117
 
78
118
  ## [0.6.1] - 31-10-2022
119
+
79
120
  ### Changed
121
+
80
122
  - Hyperedge.replace_argroles() .insert_argrole() and .add_argument() now works with functional patterns such as var.
81
123
  - Fixed bug when matching patterns containing atoms functional pattern where no atom has argroles.
82
124
 
83
125
  ### Removed
126
+
84
127
  - interactive_case_generator() from graphbrain.notebook.
85
128
 
86
129
  ## [0.6.0] - 27-10-2022
130
+
87
131
  ### Added
132
+
88
133
  - Hyperedge.atom and .not_atom properties.
89
134
  - Hyperedge.mtype() and .connector_mtype() methods.
90
135
  - Hyperedge.t, .mt, .ct and .cmt type shortcut properties.
@@ -99,6 +144,7 @@
99
144
  - Processor class.
100
145
 
101
146
  ### Changed
147
+
102
148
  - Coreference resolution now using the new spaCy experimental model.
103
149
  - Now using spaCy transformer GPU models by default, can fallback to CPU model.
104
150
  - Hyperedge.is_atom() deprecated.
@@ -112,6 +158,7 @@
112
158
  - Hyperedge.argroles() now also works at relation/concept level.
113
159
 
114
160
  ### Removed
161
+
115
162
  - graphbrain.patterns.normalize_edge().
116
163
  - graphbrain.stats obsolete package.
117
164
  - graphbrain.cognition obsolete package.
@@ -119,18 +166,22 @@
119
166
  - Hyperedge .predicate() and .predicate_atom().
120
167
 
121
168
  ## [0.5.0] - 28-07-2021
169
+
122
170
  ### Added
171
+
123
172
  - SQLite3 hypergraph database backend.
124
173
  - Hypergraph.add_with_attributes().
125
174
  - import and export commands.
126
175
  - Hypergraph context manager for batch writes (with hopen(hg_locator) as hg ...).
127
176
 
128
177
  ### Changed
178
+
129
179
  - Main hypergraph database backend is now SQLite3.
130
180
  - LevelDB backend becomes optional. (disabled by default)
131
181
  - Neuralcoref becomes optional. (disabled by default)
132
182
 
133
183
  ### Removed
184
+
134
185
  - Hypergraph.atom_count().
135
186
  - Hypergraph.edge_count().
136
187
  - Hypergraph.primary_atom_count().
@@ -139,21 +190,29 @@
139
190
  - corefs_unidecode agent.
140
191
 
141
192
  ## [0.4.3] - 22-04-2021
193
+
142
194
  ### Changed
195
+
143
196
  - Fixed AlphaBeta bug related to temporary atoms being removed too soon from atom2tokens.
144
197
  - Hypergraph.add_sequence() converts sequence name directly to atom.
145
198
  - Parser level coreference resolution (neuralcoref) disabled by default, requires dedicated build.
146
199
 
147
200
  ## [0.4.2] - 12-04-2021
201
+
148
202
  ### Changed
203
+
149
204
  - Solving wheel compilation issue.
150
205
 
151
206
  ## [0.4.1] - 07-04-2021
207
+
152
208
  ### Changed
209
+
153
210
  - Solving issue with inclusion of auxiliary data file in non-binary distributions.
154
211
 
155
212
  ## [0.4.0] - 07-04-2021
213
+
156
214
  ### Added
215
+
157
216
  - Agents system.
158
217
  - Conjunctions resolution agent.
159
218
  - Number agent (singular/plural relations) and related meaning.number module.
@@ -178,6 +237,7 @@
178
237
  - Utility functions to show colored edges in the terminal.
179
238
 
180
239
  ### Changed
240
+
181
241
  - Special characters in atoms are now percent-encoded.
182
242
  - parse() now returns a dictionary that includes inferred edges.
183
243
  - parse() now returns a dictionary of edges to text.
@@ -193,25 +253,32 @@
193
253
  - Hyperedge.replace_atom() optional unique argument.
194
254
 
195
255
  ### Removed
256
+
196
257
  - Meta-modifier hyperedge type.
197
258
  - Auxiliary, subpredicate and dependency hyperedge types.
198
259
  - Obsolete Hyperedge.nest() method.
199
260
 
200
261
  ## [0.3.2] - 10-02-2020
262
+
201
263
  ### Added
264
+
202
265
  - simplify_role() on Atom objects produces an atom with only its simple type as role.
203
266
 
204
267
  ### Changed
268
+
205
269
  - Lemmas are now based on atoms with simplified roles.
206
270
  - Improved actors agent (more accurate identification of actors, English only for now).
207
271
 
208
272
  ## [0.3.1] - 03-02-2020
273
+
209
274
  ### Added
275
+
210
276
  - German parser (experimental and incomplete).
211
277
  - Documentation.
212
278
  - Hyperedge sequences.
213
279
 
214
280
  ### Changed
281
+
215
282
  - Improved hyperedge visualization in notebooks.
216
283
  - Agents receive language and sequence.
217
284
  - txt_parser agent creates a sequence.
@@ -220,11 +287,14 @@
220
287
  - Improved conflict agent.
221
288
 
222
289
  ## [0.3.0] - 28-09-2019
290
+
223
291
  ### Added
292
+
224
293
  - Tests.
225
294
  - Documentation.
226
295
 
227
296
  ### Changed
297
+
228
298
  - Graphbrain is now beta (main APIs considered stable).
229
299
  - LevelDB edge attributes encoded in JSON.
230
300
  - Renamed hypergraph() to hgraph() and moved function to __jnit__.
@@ -237,23 +307,29 @@
237
307
  - Improved notebooks visualizations (show(), blocks(), vblocks()).
238
308
 
239
309
  ### Removed
310
+
240
311
  - graphbrain.funs module.
241
312
 
242
313
  ## [0.2.2] - 13-09-2019
243
314
 
244
315
  ### Added
316
+
245
317
  - txt_parser agent.
246
318
  - MANIFEST.in to include VERSION file in distribution.
247
319
 
248
320
  ### Changed
321
+
249
322
  - Fixing 'pip install graphbrain' on Linux/Windows.
250
323
 
251
324
  ## [0.2.1] - 04-09-2019
325
+
252
326
  ### Added
327
+
253
328
  - claim_actors and corefs_dets agents.
254
329
  - meaning.concepts module.
255
330
 
256
331
  ### Changed
332
+
257
333
  - Fixed example.
258
334
  - hypergraph.sum_degree() and .sum_deep_degree().
259
335
  - Parser improvements.
@@ -261,11 +337,14 @@
261
337
  - Improved docs.
262
338
 
263
339
  ### Removed
340
+
264
341
  - Obsolete 'work-in-progress' code.
265
342
  - hg2json command.
266
343
 
267
344
  ## [0.2.0] - 04-08-2019
345
+
268
346
  ### Added
347
+
269
348
  - Primary entities and deep degrees.
270
349
  - Hyperedges have their own class, deriving from tuple.
271
350
  - Atoms have a special class, deriving from Hyperedge.
@@ -273,11 +352,15 @@
273
352
  - Created agent system + first agents.
274
353
 
275
354
  ### Changed
355
+
276
356
  - Parsers now have own package.
277
357
 
278
358
  ### Removed
359
+
279
360
  - Old experimental code.
280
361
 
281
362
  ## [0.1.0] - 14-06-2019
363
+
282
364
  ### Added
283
- - First release.
365
+
366
+ - First release.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hyperbase
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: A foundational library for Semantic Hypergraphs
5
5
  Project-URL: Homepage, https://hyperquest.ai/hyperbase
6
6
  Author-email: "Telmo Menezes et al." <telmo@telmomenezes.net>
@@ -0,0 +1 @@
1
+ 0.10.0
@@ -124,11 +124,11 @@ uv run hyperbase parsers
124
124
  Once installed, parsers can be used from the interactive REPL:
125
125
 
126
126
  ```bash
127
- hyperbase repl --parser alphabeta --language en
127
+ hyperbase repl --parser alphabeta --lang en
128
128
  ```
129
129
 
130
130
  ```bash
131
- uv run hyperbase repl --parser alphabeta --language en
131
+ uv run hyperbase repl --parser alphabeta --lang en
132
132
  ```
133
133
 
134
134
  Or programmatically:
@@ -136,6 +136,6 @@ Or programmatically:
136
136
  ```python
137
137
  from hyperbase.parsers import get_parser
138
138
 
139
- parser = get_parser("alphabeta", language="en")
139
+ parser = get_parser("alphabeta", lang="en")
140
140
  result = parser.parse_text("The sky is blue.")
141
141
  ```
@@ -20,10 +20,10 @@ Parsers are obtained by name with `get_parser()`:
20
20
  ```python
21
21
  from hyperbase import get_parser
22
22
 
23
- parser = get_parser("alphabeta", language="en")
23
+ parser = get_parser("alphabeta", lang="en")
24
24
  ```
25
25
 
26
- The keyword arguments are forwarded to the parser constructor. Each parser plugin defines its own parameters -- for example, `alphabeta` takes a `language` code, while `generative` accepts `model_path`, `device`, `max_length`, and others.
26
+ The keyword arguments are forwarded to the parser constructor. Each parser plugin defines its own parameters -- for example, `alphabeta` takes a `lang` code, while `generative` accepts `model_path`, `device`, `max_length`, and others. Run `hyperbase repl --parser <name> --help` (or `hyperbase read --parser <name> --help`) to see the full set of CLI flags injected by the active plugin.
27
27
 
28
28
  To see which parsers are installed:
29
29
 
@@ -125,42 +125,7 @@ This is what `read_source_to_jsonl()` uses internally -- each line in the output
125
125
 
126
126
  ## Quality checking
127
127
 
128
- The `hyperbase.parsers.correctness` module provides functions to assess the quality of a parse result.
129
-
130
- ### Badness check
131
-
132
- `badness_check()` runs a comprehensive quality check on a parsed edge, combining structural validation with token-to-atom matching:
133
-
134
- ```python
135
- from hyperbase.parsers.correctness import badness_check
136
-
137
- errors = badness_check(result.edge, result.tokens)
138
- if errors:
139
- for key, error_list in errors.items():
140
- for code, message, severity in error_list:
141
- print(f"[{code}] {message} (severity: {severity})")
142
- else:
143
- print("No errors found.")
144
- ```
145
-
146
- The function returns a dictionary mapping edge fragments (or the string `'token-matching'`) to lists of `(code, message, severity)` tuples. An empty dictionary means no errors were found.
147
-
148
- The checks include:
149
-
150
- - **Structural correctness** -- validates the hyperedge against the SH specification (via `Hyperedge.check_correctness()`).
151
- - **Argument role validation** -- checks that argument roles are drawn from the valid set (`m`, `s`, `p`, `a`, `o`, `i`, `x`, `t`, `j`, `r`, `c`) and that roles like `s`, `p`, `o` are not duplicated.
152
- - **Junction consistency** -- verifies that junction arguments are consistently typed (all relations or all concepts).
153
- - **Token matching** -- ensures that every token in the original sentence maps to an atom root in the edge, and vice versa. Handles multi-token atoms, contractions and other non-trivial correspondences.
154
-
155
- ### Structural quality only
156
-
157
- For a lighter check that skips token matching:
158
-
159
- ```python
160
- from hyperbase.parsers.correctness import check_structural_quality
161
-
162
- errors = check_structural_quality(result.edge)
163
- ```
128
+ Badness/correctness checking lives in the parser plugin that needs it. The generative parser ships [`hyperbase_parser_gen.correctness.badness_check`](https://github.com/telmomenezes/hyperbase-parser-gen) for combined structural + token-matching validation; see that package's docs for usage.
164
129
 
165
130
  ## CLI
166
131
 
@@ -177,7 +142,7 @@ Shows all installed parser plugins and their entry point values.
177
142
  The REPL lets you parse sentences interactively:
178
143
 
179
144
  ```bash
180
- hyperbase repl --parser alphabeta --language en
145
+ hyperbase repl --parser alphabeta --lang en
181
146
  ```
182
147
 
183
148
  Inside the REPL, type a sentence to parse it. Use `/help` to see available commands, `/settings` to view current configuration, and `/set` to change settings on the fly (e.g. `/set parser generative`). The REPL caches parser instances, so switching between parsers is fast after the first load.
@@ -186,7 +151,7 @@ Inside the REPL, type a sentence to parse it. Use `/help` to see available comma
186
151
 
187
152
  ```bash
188
153
  # Parse a file to JSONL
189
- hyperbase read article.txt -o output.jsonl --parser alphabeta --language en
154
+ hyperbase read article.txt -o output.jsonl --parser alphabeta --lang en
190
155
 
191
156
  # Parse a Wikipedia article
192
157
  hyperbase read https://en.wikipedia.org/wiki/Hypergraph -o output.jsonl
@@ -196,10 +161,12 @@ See the [readers](readers.md) documentation for the full set of `hyperbase read`
196
161
 
197
162
  ## Custom parsers
198
163
 
199
- To create a custom parser, subclass `Parser` and implement two methods:
164
+ To create a custom parser, subclass `Parser` and implement:
200
165
 
201
- - `sentensize(text)` -- split a text string into a list of sentences.
166
+ - `__init__(params)` -- constructor accepting a dictionary of parser parameters.
167
+ - `get_sentences(text)` -- split a text string into a list of sentences.
202
168
  - `parse_sentence(sentence)` -- parse a single sentence and return a list of `ParseResult` objects.
169
+ - `accepted_params()` (classmethod) -- return a dict describing the parameters the parser accepts.
203
170
 
204
171
  Optionally, override `parse_batch(sentences)` if your parser can process multiple sentences more efficiently in a single call.
205
172
 
@@ -208,7 +175,20 @@ from hyperbase.parsers import Parser, ParseResult
208
175
  from hyperbase.hyperedge import hedge
209
176
 
210
177
  class MyParser(Parser):
211
- def sentensize(self, text):
178
+ @classmethod
179
+ def accepted_params(cls):
180
+ return {
181
+ "lang": {
182
+ "type": str, "default": None,
183
+ "description": "Language code.", "required": True,
184
+ },
185
+ }
186
+
187
+ def __init__(self, params=None):
188
+ super().__init__(params)
189
+ self.lang = self.params["lang"]
190
+
191
+ def get_sentences(self, text):
212
192
  # simple sentence splitting
213
193
  return [s.strip() for s in text.split('.') if s.strip()]
214
194
 
@@ -70,7 +70,7 @@ hyperbase read article.txt -o output.txt
70
70
  hyperbase read https://en.wikipedia.org/wiki/Hypergraph -o output.jsonl
71
71
 
72
72
  # Specify reader and parser explicitly
73
- hyperbase read source.txt -o output.jsonl --reader plain_text --parser alphabeta --language en
73
+ hyperbase read source.txt -o output.jsonl --reader plain_text --parser alphabeta --lang en
74
74
  ```
75
75
 
76
76
  ## Built-in readers
@@ -74,7 +74,7 @@ target-version = "py310"
74
74
  select = ["E", "F", "W", "I", "UP", "B", "SIM", "RUF", "Q", "C4", "PT", "N", "ANN"]
75
75
 
76
76
  [tool.ruff.lint.per-file-ignores]
77
- "tests/*" = ["E501", "ANN201", "D100", "D101", "D102", "D400", "D415"]
77
+ "tests/*" = ["ANN001", "ANN003", "ANN201", "ANN202", "ANN204", "ANN205", "D100", "D101", "D102", "D400", "D415", "E501", "N802", "PT011"]
78
78
 
79
79
  [tool.ruff.lint.flake8-quotes]
80
80
  inline-quotes = "double"
@@ -1,4 +1,4 @@
1
- from hyperbase.hyperedge import hedge
1
+ from hyperbase.builders import hedge
2
2
  from hyperbase.loaders import load_edges
3
3
  from hyperbase.parsers import get_parser
4
4
 
@@ -0,0 +1,187 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable
4
+ from typing import Any, cast
5
+
6
+ from hyperbase.constants import ATOM_ENCODE_TABLE
7
+ from hyperbase.hyperedge import Atom, Hyperedge, UniqueAtom
8
+ from hyperbase.parsers.parse_result import ParseResult
9
+
10
+
11
+ def str_to_atom(s: str) -> str:
12
+ """Converts a string into a valid atom."""
13
+ return s.lower().translate(ATOM_ENCODE_TABLE)
14
+
15
+
16
+ def _edge_str_has_outer_parens(edge_str: str) -> bool:
17
+ """Check if string representation of edge is delimited by outer
18
+ parenthesis.
19
+ """
20
+ if len(edge_str) < 2:
21
+ return False
22
+ return edge_str[0] == "("
23
+
24
+
25
+ def split_edge_str(edge_str: str) -> tuple[str, ...]:
26
+ """Shallow split into tokens of a string representation of an edge,
27
+ without outer parenthesis.
28
+ """
29
+ start = 0
30
+ depth = 0
31
+ str_length = len(edge_str)
32
+ active = 0
33
+ tokens: list[str] = []
34
+ for i in range(str_length):
35
+ c = edge_str[i]
36
+ if c == " ":
37
+ if active and depth == 0:
38
+ tokens.append(edge_str[start:i])
39
+ active = 0
40
+ elif c == "(":
41
+ if depth == 0:
42
+ active = 1
43
+ start = i
44
+ depth += 1
45
+ elif c == ")":
46
+ depth -= 1
47
+ if depth == 0:
48
+ tokens.append(edge_str[start : i + 1])
49
+ active = 0
50
+ elif depth < 0:
51
+ raise ValueError(f"Unbalanced parenthesis in edge string: '{edge_str}'")
52
+ else:
53
+ if not active:
54
+ active = 1
55
+ start = i
56
+
57
+ if active:
58
+ if depth > 0:
59
+ raise ValueError(f"Unbalanced parenthesis in edge string: '{edge_str}'")
60
+ else:
61
+ tokens.append(edge_str[start:])
62
+
63
+ return tuple(tokens)
64
+
65
+
66
+ def _hedge_from_str(source: str) -> Hyperedge:
67
+ """Iteratively parse an edge string into a Hyperedge.
68
+
69
+ Uses an explicit stack rather than recursion so that pathologically
70
+ nested edge strings cannot exhaust Python's call stack. Each frame in
71
+ the stack represents one open ``(...)`` group being assembled and
72
+ holds: ``[parens_flag, tokens, next_token_index, children_built]``.
73
+ """
74
+ edge_str = source.strip().replace("\n", " ")
75
+ parens = _edge_str_has_outer_parens(edge_str)
76
+ inner = edge_str[1:-1] if parens else edge_str
77
+
78
+ tokens = split_edge_str(inner)
79
+ if not tokens:
80
+ raise ValueError(f"Edge string is empty: '{source}'")
81
+
82
+ stack: list[list[Any]] = [[parens, tokens, 0, []]]
83
+ final: Hyperedge | None = None
84
+
85
+ while stack:
86
+ frame = stack[-1]
87
+ if frame[2] >= len(frame[1]):
88
+ # All tokens for this frame consumed; build the edge.
89
+ children: list[Hyperedge] = frame[3]
90
+ frame_parens: bool = frame[0]
91
+ if len(children) == 1 and isinstance(children[0], Atom):
92
+ built: Hyperedge = Atom(str(children[0]), frame_parens)
93
+ elif children:
94
+ built = Hyperedge(tuple(children))
95
+ else:
96
+ # Unreachable: empty token lists are rejected before push,
97
+ # but keep the guard for defensiveness.
98
+ raise ValueError(f"Edge string is empty: '{source}'")
99
+ stack.pop()
100
+ if stack:
101
+ stack[-1][3].append(built)
102
+ else:
103
+ final = built
104
+ continue
105
+
106
+ token = frame[1][frame[2]]
107
+ frame[2] += 1
108
+ if _edge_str_has_outer_parens(token):
109
+ inner_tok = token[1:-1]
110
+ sub_tokens = split_edge_str(inner_tok)
111
+ if not sub_tokens:
112
+ raise ValueError(f"Edge string is empty: '{token}'")
113
+ stack.append([True, sub_tokens, 0, []])
114
+ else:
115
+ frame[3].append(Atom(token))
116
+
117
+ assert final is not None # loop guarantees this
118
+ return final
119
+
120
+
121
+ def _collect_positions(tok_pos: Hyperedge) -> list[int]:
122
+ """Collect all valid (>= 0) token positions from a tok_pos tree."""
123
+ if tok_pos.atom:
124
+ pos = int(str(tok_pos))
125
+ return [pos] if pos >= 0 else []
126
+ else:
127
+ positions: list[int] = []
128
+ for sub in tok_pos:
129
+ positions.extend(_collect_positions(sub))
130
+ return positions
131
+
132
+
133
+ def _rebuild_with_text(
134
+ edge: Hyperedge,
135
+ tok_pos: Hyperedge,
136
+ tokens: list[str],
137
+ ) -> Hyperedge:
138
+ """Recursively rebuild an edge, assigning text from tokens and tok_pos."""
139
+ if edge.atom:
140
+ atom = cast(Atom, edge)
141
+ pos = int(str(tok_pos))
142
+ text = tokens[pos] if pos >= 0 else None
143
+ return Atom(str(atom), atom.parens, text=text)
144
+ else:
145
+ new_children = tuple(
146
+ _rebuild_with_text(sub_edge, sub_tok_pos, tokens)
147
+ for sub_edge, sub_tok_pos in zip(edge, tok_pos, strict=False)
148
+ )
149
+ positions = _collect_positions(tok_pos)
150
+ if positions:
151
+ min_pos = min(positions)
152
+ max_pos = max(positions)
153
+ text = " ".join(tokens[min_pos : max_pos + 1])
154
+ else:
155
+ text = None
156
+ return Hyperedge(new_children, text=text)
157
+
158
+
159
+ def hedge(
160
+ source: str | Hyperedge | list | tuple | ParseResult,
161
+ ) -> Hyperedge:
162
+ """Create a hyperedge."""
163
+ if isinstance(source, ParseResult):
164
+ _source = source
165
+ edge = _rebuild_with_text(_source.edge, _source.tok_pos, _source.tokens)
166
+ object.__setattr__(edge, "text", _source.text)
167
+ return edge
168
+ if type(source) in {tuple, list}:
169
+ _source = cast(Iterable, source)
170
+ return Hyperedge(tuple(hedge(item) for item in _source))
171
+ elif type(source) is str:
172
+ return _hedge_from_str(source)
173
+ elif type(source) in {Hyperedge, Atom, UniqueAtom}:
174
+ return source # type: ignore
175
+ else:
176
+ raise TypeError(
177
+ f"Cannot create hyperedge from {type(source).__name__}: {source!r}"
178
+ )
179
+
180
+
181
+ def build_atom(text: str, *parts: str) -> Atom:
182
+ """Build an atom from text and other parts."""
183
+ atom = str_to_atom(text)
184
+ parts_str = "/".join([part for part in parts if part])
185
+ if len(parts_str) > 0:
186
+ atom_str = "".join((atom, "/", parts_str))
187
+ return Atom(atom_str)