hyperbase 0.9.0__tar.gz → 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hyperbase-0.9.0 → hyperbase-0.10.0}/CHANGELOG.md +84 -1
- {hyperbase-0.9.0 → hyperbase-0.10.0}/PKG-INFO +1 -1
- hyperbase-0.10.0/VERSION +1 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/installation.md +3 -3
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/parsers.md +23 -43
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/readers.md +1 -1
- {hyperbase-0.9.0 → hyperbase-0.10.0}/pyproject.toml +1 -1
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/__init__.py +1 -1
- hyperbase-0.10.0/src/hyperbase/builders.py +187 -0
- hyperbase-0.10.0/src/hyperbase/cli/__init__.py +202 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/cli/read.py +26 -14
- hyperbase-0.10.0/src/hyperbase/cli/repl.py +819 -0
- hyperbase-0.10.0/src/hyperbase/constants.py +105 -0
- hyperbase-0.10.0/src/hyperbase/correctness.py +150 -0
- hyperbase-0.10.0/src/hyperbase/hyperedge.py +634 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/loaders.py +5 -2
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/parsers/__init__.py +12 -4
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/parsers/parse_result.py +14 -2
- hyperbase-0.10.0/src/hyperbase/parsers/parser.py +175 -0
- hyperbase-0.10.0/src/hyperbase/parsers/repl_api.py +63 -0
- hyperbase-0.10.0/src/hyperbase/parsers/utils.py +44 -0
- hyperbase-0.10.0/src/hyperbase/patterns/__init__.py +10 -0
- hyperbase-0.10.0/src/hyperbase/patterns/checks.py +71 -0
- hyperbase-0.10.0/src/hyperbase/patterns/combine.py +353 -0
- hyperbase-0.10.0/src/hyperbase/patterns/matcher.py +643 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/readers/reader.py +4 -4
- hyperbase-0.10.0/src/hyperbase/transforms.py +155 -0
- hyperbase-0.10.0/tests/test_correctness_errors.py +275 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/tests/test_hyperedge.py +3 -2
- {hyperbase-0.9.0 → hyperbase-0.10.0}/tests/test_hyperedge_text.py +1 -14
- {hyperbase-0.9.0 → hyperbase-0.10.0}/tests/test_load_edges.py +7 -8
- hyperbase-0.10.0/tests/test_malformed_input.py +178 -0
- hyperbase-0.10.0/tests/test_parser_plugin.py +196 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/tests/test_patterns.py +179 -87
- hyperbase-0.10.0/tests/test_performance.py +149 -0
- hyperbase-0.10.0/tests/test_reader_plugin.py +266 -0
- hyperbase-0.9.0/VERSION +0 -1
- hyperbase-0.9.0/src/hyperbase/cli/__init__.py +0 -161
- hyperbase-0.9.0/src/hyperbase/cli/repl.py +0 -854
- hyperbase-0.9.0/src/hyperbase/constants.py +0 -4
- hyperbase-0.9.0/src/hyperbase/hyperedge.py +0 -1120
- hyperbase-0.9.0/src/hyperbase/parsers/correctness.py +0 -326
- hyperbase-0.9.0/src/hyperbase/parsers/parser.py +0 -88
- hyperbase-0.9.0/src/hyperbase/parsers/utils.py +0 -19
- hyperbase-0.9.0/src/hyperbase/patterns/__init__.py +0 -95
- hyperbase-0.9.0/src/hyperbase/patterns/argroles.py +0 -142
- hyperbase-0.9.0/src/hyperbase/patterns/atoms.py +0 -98
- hyperbase-0.9.0/src/hyperbase/patterns/common.py +0 -181
- hyperbase-0.9.0/src/hyperbase/patterns/matcher.py +0 -235
- hyperbase-0.9.0/src/hyperbase/patterns/merge.py +0 -58
- hyperbase-0.9.0/src/hyperbase/patterns/properties.py +0 -61
- hyperbase-0.9.0/src/hyperbase/patterns/utils.py +0 -118
- hyperbase-0.9.0/src/hyperbase/patterns/variables.py +0 -152
- hyperbase-0.9.0/tests/test_correctness.py +0 -360
- {hyperbase-0.9.0 → hyperbase-0.10.0}/.github/workflows/docs.yml +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/.github/workflows/publish.yml +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/.gitignore +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/.pre-commit-config.yaml +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/AUTHORS +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/LICENSE +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/README.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/atom-structure.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/camille.jpg +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/chih-chun.jpg +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/kexinren.jpg +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/max.jpg +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/telmo.jpg +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/authors/yael-stein.jpg +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/block.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/favicon.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/graphbrain.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/hyper-vs-graph.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/hyperedge-blocks.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/logo_hyperquest_small_nav.svg +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/one-pagers/GB-A.pdf +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/one-pagers/GB-A_thumb.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/one-pagers/GB-C.pdf +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/one-pagers/GB-C_thumb.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/show.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/socsemics.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/images/vblock.png +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/assets/stylesheets/extra.css +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/authors.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/index.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/api.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/discovering-patterns.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/hyperedges.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/notation.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/overview.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/manual/patterns.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/pubs-cases.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/tutorials/parsing-a-sentence.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/docs/tutorials/playing-with-hyperedges.md +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/mkdocs.yml +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/cli/parsers.py +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/data/__init__.py +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/data/wikipedia/discard_sections.txt +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/patterns/counter.py +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/readers/__init__.py +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/readers/txt.py +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/readers/url.py +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/src/hyperbase/readers/wikipedia.py +0 -0
- {hyperbase-0.9.0 → hyperbase-0.10.0}/tests/__init__.py +0 -0
|
@@ -1,8 +1,39 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.10.0] - 11-04-2026
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- `[]` pattern notation for specifying sequences of arguments.
|
|
8
|
+
- EdgeType and ArgRole enums.
|
|
9
|
+
- safety cap for match (`_MAX_ARGROLE_ITEMS=10`) against pathological edge arities.
|
|
10
|
+
- caching of computed `Hyperedge`/`Atom` properties.
|
|
11
|
+
- `parse_to_jsonl` method on `Parser`.
|
|
12
|
+
- unified parameter interface for parsers.
|
|
13
|
+
- method `Parser.accepted_params`.
|
|
14
|
+
- maximum depth protection for parsers.
|
|
15
|
+
- repl api for parsers.
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
|
|
19
|
+
- multiple patterns functions are now `Hyperedge`/`Atom` methods: `is_wildcard`, `is_pattern`, `is_fun_pattern`, `is_variable`, `contains_variable`, `variable_name`.
|
|
20
|
+
- `hyperbase.py` now delegating to smaller modules with well-defined concerns: `builders.py`, `correctness.py`, `transforms.py`, `patterns.checks.py` and `patterns.matcher.py`.
|
|
21
|
+
- replaced `itertools.permutations` with constraint-propagated backtracking in argrole matcher.
|
|
22
|
+
- `parse_text` renamed to `parse`; old iterator-based `parse` removed.
|
|
23
|
+
- `read_source` renamed to `parse_source`; `read_source_to_jsonl` renamed to `parse_source_to_jsonl`.
|
|
24
|
+
- renamed `sentensize` to `get_sentences`.
|
|
25
|
+
- hedge now uses an explicit stack instead of recursion (so that pathologically
|
|
26
|
+
nested edge strings cannot exhaust Python's call stack).
|
|
27
|
+
- renamed parsers.correctness to parsers.badness.
|
|
28
|
+
|
|
29
|
+
### Removed
|
|
30
|
+
|
|
31
|
+
- `__add__` operator overloading in `Hyperedge`/`Atom`.
|
|
32
|
+
|
|
3
33
|
## [0.9.0] - 05-04-2026
|
|
4
34
|
|
|
5
35
|
### Added
|
|
36
|
+
|
|
6
37
|
- readers (txt, url, wikipedia).
|
|
7
38
|
- cli interface with repl, parsers, readers.
|
|
8
39
|
- hyperedge.Hyperedge.match function (calls parsers.match_pattern).
|
|
@@ -11,6 +42,7 @@
|
|
|
11
42
|
- load_edges function.
|
|
12
43
|
|
|
13
44
|
### Changed
|
|
45
|
+
|
|
14
46
|
- added get_parser to main functions (at hyperbase root).
|
|
15
47
|
- improved documentation.
|
|
16
48
|
- hedge now accepts ParseResults and can recursively add Hyperedge.text strings.
|
|
@@ -25,23 +57,27 @@
|
|
|
25
57
|
- Renamed Hyperedge.normalized to normalise.
|
|
26
58
|
|
|
27
59
|
### Removed
|
|
60
|
+
|
|
28
61
|
- function patterns.edge_matches_pattern.
|
|
29
62
|
- deprecated and obsolete methods from Hyperedge: is_atom, to_str, roots, insert_first_argument, connect, sequence, contains_atom_type, main_concepts, replace_main_concept, has_argroles.
|
|
30
63
|
|
|
31
64
|
## [0.8.0] - 26-03-2026 - hyperbase is the successor of graphbrain
|
|
32
65
|
|
|
33
66
|
### Added
|
|
67
|
+
|
|
34
68
|
- parser plugin foundation.
|
|
35
69
|
- more comprehensive Hyperedge.check_correctness.
|
|
36
70
|
- check parse correctness.
|
|
37
71
|
- type checking: full code coverage.
|
|
38
72
|
|
|
39
73
|
### Changed
|
|
74
|
+
|
|
40
75
|
- renamed library to hyperbase.
|
|
41
76
|
- trimmed down library to the essentials: hyperedge, patterns and parser foundations.
|
|
42
77
|
- converted documentation to Material for MkDocs.
|
|
43
78
|
|
|
44
79
|
### Removed
|
|
80
|
+
|
|
45
81
|
- hypergraph module, hypergraph database (memory module).
|
|
46
82
|
- alphabeta parser implementation.
|
|
47
83
|
- old scripts, examples, processors.
|
|
@@ -51,7 +87,9 @@
|
|
|
51
87
|
- obsolete constants.
|
|
52
88
|
|
|
53
89
|
## [0.7.0] - 05-03-2026
|
|
90
|
+
|
|
54
91
|
### Added
|
|
92
|
+
|
|
55
93
|
- patterns.is_wildcard().
|
|
56
94
|
- Base class hypergraph.memory.keyvalue.KeyValue for key-value hypergraph databases, removing redundant code between LevelDB and SQLite.
|
|
57
95
|
- Tests for LevelDB (only the SQLite Hypergraph implementation was being directly tested).
|
|
@@ -63,6 +101,7 @@
|
|
|
63
101
|
- Hypergraph.get_attributes().
|
|
64
102
|
|
|
65
103
|
### Changed
|
|
104
|
+
|
|
66
105
|
- Entire project is now in pure Python
|
|
67
106
|
- Python >=3.10 now required.
|
|
68
107
|
- Hypergraph.search(), .match() and .count() now working with functional patterns and argument role matching.
|
|
@@ -72,19 +111,25 @@
|
|
|
72
111
|
- Matches from patterns with repeated variables are collected in lists.
|
|
73
112
|
|
|
74
113
|
### Removed
|
|
114
|
+
|
|
75
115
|
- graphbrain.logic obsolete module.
|
|
76
116
|
- LevelDB backend
|
|
77
117
|
|
|
78
118
|
## [0.6.1] - 31-10-2022
|
|
119
|
+
|
|
79
120
|
### Changed
|
|
121
|
+
|
|
80
122
|
- Hyperedge.replace_argroles() .insert_argrole() and .add_argument() now works with functional patterns such as var.
|
|
81
123
|
- Fixed bug when matching patterns containing atoms functional pattern where no atom has argroles.
|
|
82
124
|
|
|
83
125
|
### Removed
|
|
126
|
+
|
|
84
127
|
- interactive_case_generator() from graphbrain.notebook.
|
|
85
128
|
|
|
86
129
|
## [0.6.0] - 27-10-2022
|
|
130
|
+
|
|
87
131
|
### Added
|
|
132
|
+
|
|
88
133
|
- Hyperedge.atom and .not_atom properties.
|
|
89
134
|
- Hyperedge.mtype() and .connector_mtype() methods.
|
|
90
135
|
- Hyperedge.t, .mt, .ct and .cmt type shortcut properties.
|
|
@@ -99,6 +144,7 @@
|
|
|
99
144
|
- Processor class.
|
|
100
145
|
|
|
101
146
|
### Changed
|
|
147
|
+
|
|
102
148
|
- Coreference resolution now using the new spaCy experimental model.
|
|
103
149
|
- Now using spaCy transformer GPU models by default, can fallback to CPU model.
|
|
104
150
|
- Hyperedge.is_atom() deprecated.
|
|
@@ -112,6 +158,7 @@
|
|
|
112
158
|
- Hyperedge.argroles() now also works at relation/concept level.
|
|
113
159
|
|
|
114
160
|
### Removed
|
|
161
|
+
|
|
115
162
|
- graphbrain.patterns.normalize_edge().
|
|
116
163
|
- graphbrain.stats obsolete package.
|
|
117
164
|
- graphbrain.cognition obsolete package.
|
|
@@ -119,18 +166,22 @@
|
|
|
119
166
|
- Hyperedge .predicate() and .predicate_atom().
|
|
120
167
|
|
|
121
168
|
## [0.5.0] - 28-07-2021
|
|
169
|
+
|
|
122
170
|
### Added
|
|
171
|
+
|
|
123
172
|
- SQLite3 hypergraph database backend.
|
|
124
173
|
- Hypergraph.add_with_attributes().
|
|
125
174
|
- import and export commands.
|
|
126
175
|
- Hypergraph context manager for batch writes (with hopen(hg_locator) as hg ...).
|
|
127
176
|
|
|
128
177
|
### Changed
|
|
178
|
+
|
|
129
179
|
- Main hypergraph database backend is now SQLite3.
|
|
130
180
|
- LevelDB backend becomes optional. (disabled by default)
|
|
131
181
|
- Neuralcoref becomes optional. (disabled by default)
|
|
132
182
|
|
|
133
183
|
### Removed
|
|
184
|
+
|
|
134
185
|
- Hypergraph.atom_count().
|
|
135
186
|
- Hypergraph.edge_count().
|
|
136
187
|
- Hypergraph.primary_atom_count().
|
|
@@ -139,21 +190,29 @@
|
|
|
139
190
|
- corefs_unidecode agent.
|
|
140
191
|
|
|
141
192
|
## [0.4.3] - 22-04-2021
|
|
193
|
+
|
|
142
194
|
### Changed
|
|
195
|
+
|
|
143
196
|
- Fixed AlphaBeta bug related to temporary atoms being removed too soon from atom2tokens.
|
|
144
197
|
- Hypergraph.add_sequence() converts sequence name directly to atom.
|
|
145
198
|
- Parser level coreference resolution (neuralcoref) disabled by default, requires dedicated build.
|
|
146
199
|
|
|
147
200
|
## [0.4.2] - 12-04-2021
|
|
201
|
+
|
|
148
202
|
### Changed
|
|
203
|
+
|
|
149
204
|
- Solving wheel compilation issue.
|
|
150
205
|
|
|
151
206
|
## [0.4.1] - 07-04-2021
|
|
207
|
+
|
|
152
208
|
### Changed
|
|
209
|
+
|
|
153
210
|
- Solving issue with inclusion of auxiliary data file in non-binary distributions.
|
|
154
211
|
|
|
155
212
|
## [0.4.0] - 07-04-2021
|
|
213
|
+
|
|
156
214
|
### Added
|
|
215
|
+
|
|
157
216
|
- Agents system.
|
|
158
217
|
- Conjunctions resolution agent.
|
|
159
218
|
- Number agent (singular/plural relations) and related meaning.number module.
|
|
@@ -178,6 +237,7 @@
|
|
|
178
237
|
- Utility functions to show colored edges in the terminal.
|
|
179
238
|
|
|
180
239
|
### Changed
|
|
240
|
+
|
|
181
241
|
- Special characters in atoms are now percent-encoded.
|
|
182
242
|
- parse() now returns a dictionary that includes inferred edges.
|
|
183
243
|
- parse() now returns a dictionary of edges to text.
|
|
@@ -193,25 +253,32 @@
|
|
|
193
253
|
- Hyperedge.replace_atom() optional unique argument.
|
|
194
254
|
|
|
195
255
|
### Removed
|
|
256
|
+
|
|
196
257
|
- Meta-modifier hyperedge type.
|
|
197
258
|
- Auxiliary, subpredicate and dependency hyperedge types.
|
|
198
259
|
- Obsolete Hyperedge.nest() method.
|
|
199
260
|
|
|
200
261
|
## [0.3.2] - 10-02-2020
|
|
262
|
+
|
|
201
263
|
### Added
|
|
264
|
+
|
|
202
265
|
- simplify_role() on Atom objects produces an atom with only its simple type as role.
|
|
203
266
|
|
|
204
267
|
### Changed
|
|
268
|
+
|
|
205
269
|
- Lemmas are now based on atoms with simplified roles.
|
|
206
270
|
- Improved actors agent (more accurate identification of actors, English only for now).
|
|
207
271
|
|
|
208
272
|
## [0.3.1] - 03-02-2020
|
|
273
|
+
|
|
209
274
|
### Added
|
|
275
|
+
|
|
210
276
|
- German parser (experimental and incomplete).
|
|
211
277
|
- Documentation.
|
|
212
278
|
- Hyperedge sequences.
|
|
213
279
|
|
|
214
280
|
### Changed
|
|
281
|
+
|
|
215
282
|
- Improved hyperedge visualization in notebooks.
|
|
216
283
|
- Agents receive language and sequence.
|
|
217
284
|
- txt_parser agent creates a sequence.
|
|
@@ -220,11 +287,14 @@
|
|
|
220
287
|
- Improved conflict agent.
|
|
221
288
|
|
|
222
289
|
## [0.3.0] - 28-09-2019
|
|
290
|
+
|
|
223
291
|
### Added
|
|
292
|
+
|
|
224
293
|
- Tests.
|
|
225
294
|
- Documentation.
|
|
226
295
|
|
|
227
296
|
### Changed
|
|
297
|
+
|
|
228
298
|
- Graphbrain is now beta (main APIs considered stable).
|
|
229
299
|
- LevelDB edge attributes encoded in JSON.
|
|
230
300
|
- Renamed hypergraph() to hgraph() and moved function to __jnit__.
|
|
@@ -237,23 +307,29 @@
|
|
|
237
307
|
- Improved notebooks visualizations (show(), blocks(), vblocks()).
|
|
238
308
|
|
|
239
309
|
### Removed
|
|
310
|
+
|
|
240
311
|
- graphbrain.funs module.
|
|
241
312
|
|
|
242
313
|
## [0.2.2] - 13-09-2019
|
|
243
314
|
|
|
244
315
|
### Added
|
|
316
|
+
|
|
245
317
|
- txt_parser agent.
|
|
246
318
|
- MANIFEST.in to include VERSION file in distribution.
|
|
247
319
|
|
|
248
320
|
### Changed
|
|
321
|
+
|
|
249
322
|
- Fixing 'pip install graphbrain' on Linux/Windows.
|
|
250
323
|
|
|
251
324
|
## [0.2.1] - 04-09-2019
|
|
325
|
+
|
|
252
326
|
### Added
|
|
327
|
+
|
|
253
328
|
- claim_actors and corefs_dets agents.
|
|
254
329
|
- meaning.concepts module.
|
|
255
330
|
|
|
256
331
|
### Changed
|
|
332
|
+
|
|
257
333
|
- Fixed example.
|
|
258
334
|
- hypergraph.sum_degree() and .sum_deep_degree().
|
|
259
335
|
- Parser improvements.
|
|
@@ -261,11 +337,14 @@
|
|
|
261
337
|
- Improved docs.
|
|
262
338
|
|
|
263
339
|
### Removed
|
|
340
|
+
|
|
264
341
|
- Obsolete 'work-in-progress' code.
|
|
265
342
|
- hg2json command.
|
|
266
343
|
|
|
267
344
|
## [0.2.0] - 04-08-2019
|
|
345
|
+
|
|
268
346
|
### Added
|
|
347
|
+
|
|
269
348
|
- Primary entities and deep degrees.
|
|
270
349
|
- Hyperedges have their own class, deriving from tuple.
|
|
271
350
|
- Atoms have a special class, deriving from Hyperedge.
|
|
@@ -273,11 +352,15 @@
|
|
|
273
352
|
- Created agent system + first agents.
|
|
274
353
|
|
|
275
354
|
### Changed
|
|
355
|
+
|
|
276
356
|
- Parsers now have own package.
|
|
277
357
|
|
|
278
358
|
### Removed
|
|
359
|
+
|
|
279
360
|
- Old experimental code.
|
|
280
361
|
|
|
281
362
|
## [0.1.0] - 14-06-2019
|
|
363
|
+
|
|
282
364
|
### Added
|
|
283
|
-
|
|
365
|
+
|
|
366
|
+
- First release.
|
hyperbase-0.10.0/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.10.0
|
|
@@ -124,11 +124,11 @@ uv run hyperbase parsers
|
|
|
124
124
|
Once installed, parsers can be used from the interactive REPL:
|
|
125
125
|
|
|
126
126
|
```bash
|
|
127
|
-
hyperbase repl --parser alphabeta --
|
|
127
|
+
hyperbase repl --parser alphabeta --lang en
|
|
128
128
|
```
|
|
129
129
|
|
|
130
130
|
```bash
|
|
131
|
-
uv run hyperbase repl --parser alphabeta --
|
|
131
|
+
uv run hyperbase repl --parser alphabeta --lang en
|
|
132
132
|
```
|
|
133
133
|
|
|
134
134
|
Or programmatically:
|
|
@@ -136,6 +136,6 @@ Or programmatically:
|
|
|
136
136
|
```python
|
|
137
137
|
from hyperbase.parsers import get_parser
|
|
138
138
|
|
|
139
|
-
parser = get_parser("alphabeta",
|
|
139
|
+
parser = get_parser("alphabeta", lang="en")
|
|
140
140
|
result = parser.parse_text("The sky is blue.")
|
|
141
141
|
```
|
|
@@ -20,10 +20,10 @@ Parsers are obtained by name with `get_parser()`:
|
|
|
20
20
|
```python
|
|
21
21
|
from hyperbase import get_parser
|
|
22
22
|
|
|
23
|
-
parser = get_parser("alphabeta",
|
|
23
|
+
parser = get_parser("alphabeta", lang="en")
|
|
24
24
|
```
|
|
25
25
|
|
|
26
|
-
The keyword arguments are forwarded to the parser constructor. Each parser plugin defines its own parameters -- for example, `alphabeta` takes a `
|
|
26
|
+
The keyword arguments are forwarded to the parser constructor. Each parser plugin defines its own parameters -- for example, `alphabeta` takes a `lang` code, while `generative` accepts `model_path`, `device`, `max_length`, and others. Run `hyperbase repl --parser <name> --help` (or `hyperbase read --parser <name> --help`) to see the full set of CLI flags injected by the active plugin.
|
|
27
27
|
|
|
28
28
|
To see which parsers are installed:
|
|
29
29
|
|
|
@@ -125,42 +125,7 @@ This is what `read_source_to_jsonl()` uses internally -- each line in the output
|
|
|
125
125
|
|
|
126
126
|
## Quality checking
|
|
127
127
|
|
|
128
|
-
The `
|
|
129
|
-
|
|
130
|
-
### Badness check
|
|
131
|
-
|
|
132
|
-
`badness_check()` runs a comprehensive quality check on a parsed edge, combining structural validation with token-to-atom matching:
|
|
133
|
-
|
|
134
|
-
```python
|
|
135
|
-
from hyperbase.parsers.correctness import badness_check
|
|
136
|
-
|
|
137
|
-
errors = badness_check(result.edge, result.tokens)
|
|
138
|
-
if errors:
|
|
139
|
-
for key, error_list in errors.items():
|
|
140
|
-
for code, message, severity in error_list:
|
|
141
|
-
print(f"[{code}] {message} (severity: {severity})")
|
|
142
|
-
else:
|
|
143
|
-
print("No errors found.")
|
|
144
|
-
```
|
|
145
|
-
|
|
146
|
-
The function returns a dictionary mapping edge fragments (or the string `'token-matching'`) to lists of `(code, message, severity)` tuples. An empty dictionary means no errors were found.
|
|
147
|
-
|
|
148
|
-
The checks include:
|
|
149
|
-
|
|
150
|
-
- **Structural correctness** -- validates the hyperedge against the SH specification (via `Hyperedge.check_correctness()`).
|
|
151
|
-
- **Argument role validation** -- checks that argument roles are drawn from the valid set (`m`, `s`, `p`, `a`, `o`, `i`, `x`, `t`, `j`, `r`, `c`) and that roles like `s`, `p`, `o` are not duplicated.
|
|
152
|
-
- **Junction consistency** -- verifies that junction arguments are consistently typed (all relations or all concepts).
|
|
153
|
-
- **Token matching** -- ensures that every token in the original sentence maps to an atom root in the edge, and vice versa. Handles multi-token atoms, contractions and other non-trivial correspondences.
|
|
154
|
-
|
|
155
|
-
### Structural quality only
|
|
156
|
-
|
|
157
|
-
For a lighter check that skips token matching:
|
|
158
|
-
|
|
159
|
-
```python
|
|
160
|
-
from hyperbase.parsers.correctness import check_structural_quality
|
|
161
|
-
|
|
162
|
-
errors = check_structural_quality(result.edge)
|
|
163
|
-
```
|
|
128
|
+
Badness/correctness checking lives in the parser plugin that needs it. The generative parser ships [`hyperbase_parser_gen.correctness.badness_check`](https://github.com/telmomenezes/hyperbase-parser-gen) for combined structural + token-matching validation; see that package's docs for usage.
|
|
164
129
|
|
|
165
130
|
## CLI
|
|
166
131
|
|
|
@@ -177,7 +142,7 @@ Shows all installed parser plugins and their entry point values.
|
|
|
177
142
|
The REPL lets you parse sentences interactively:
|
|
178
143
|
|
|
179
144
|
```bash
|
|
180
|
-
hyperbase repl --parser alphabeta --
|
|
145
|
+
hyperbase repl --parser alphabeta --lang en
|
|
181
146
|
```
|
|
182
147
|
|
|
183
148
|
Inside the REPL, type a sentence to parse it. Use `/help` to see available commands, `/settings` to view current configuration, and `/set` to change settings on the fly (e.g. `/set parser generative`). The REPL caches parser instances, so switching between parsers is fast after the first load.
|
|
@@ -186,7 +151,7 @@ Inside the REPL, type a sentence to parse it. Use `/help` to see available comma
|
|
|
186
151
|
|
|
187
152
|
```bash
|
|
188
153
|
# Parse a file to JSONL
|
|
189
|
-
hyperbase read article.txt -o output.jsonl --parser alphabeta --
|
|
154
|
+
hyperbase read article.txt -o output.jsonl --parser alphabeta --lang en
|
|
190
155
|
|
|
191
156
|
# Parse a Wikipedia article
|
|
192
157
|
hyperbase read https://en.wikipedia.org/wiki/Hypergraph -o output.jsonl
|
|
@@ -196,10 +161,12 @@ See the [readers](readers.md) documentation for the full set of `hyperbase read`
|
|
|
196
161
|
|
|
197
162
|
## Custom parsers
|
|
198
163
|
|
|
199
|
-
To create a custom parser, subclass `Parser` and implement
|
|
164
|
+
To create a custom parser, subclass `Parser` and implement:
|
|
200
165
|
|
|
201
|
-
- `
|
|
166
|
+
- `__init__(params)` -- constructor accepting a dictionary of parser parameters.
|
|
167
|
+
- `get_sentences(text)` -- split a text string into a list of sentences.
|
|
202
168
|
- `parse_sentence(sentence)` -- parse a single sentence and return a list of `ParseResult` objects.
|
|
169
|
+
- `accepted_params()` (classmethod) -- return a dict describing the parameters the parser accepts.
|
|
203
170
|
|
|
204
171
|
Optionally, override `parse_batch(sentences)` if your parser can process multiple sentences more efficiently in a single call.
|
|
205
172
|
|
|
@@ -208,7 +175,20 @@ from hyperbase.parsers import Parser, ParseResult
|
|
|
208
175
|
from hyperbase.hyperedge import hedge
|
|
209
176
|
|
|
210
177
|
class MyParser(Parser):
|
|
211
|
-
|
|
178
|
+
@classmethod
|
|
179
|
+
def accepted_params(cls):
|
|
180
|
+
return {
|
|
181
|
+
"lang": {
|
|
182
|
+
"type": str, "default": None,
|
|
183
|
+
"description": "Language code.", "required": True,
|
|
184
|
+
},
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
def __init__(self, params=None):
|
|
188
|
+
super().__init__(params)
|
|
189
|
+
self.lang = self.params["lang"]
|
|
190
|
+
|
|
191
|
+
def get_sentences(self, text):
|
|
212
192
|
# simple sentence splitting
|
|
213
193
|
return [s.strip() for s in text.split('.') if s.strip()]
|
|
214
194
|
|
|
@@ -70,7 +70,7 @@ hyperbase read article.txt -o output.txt
|
|
|
70
70
|
hyperbase read https://en.wikipedia.org/wiki/Hypergraph -o output.jsonl
|
|
71
71
|
|
|
72
72
|
# Specify reader and parser explicitly
|
|
73
|
-
hyperbase read source.txt -o output.jsonl --reader plain_text --parser alphabeta --
|
|
73
|
+
hyperbase read source.txt -o output.jsonl --reader plain_text --parser alphabeta --lang en
|
|
74
74
|
```
|
|
75
75
|
|
|
76
76
|
## Built-in readers
|
|
@@ -74,7 +74,7 @@ target-version = "py310"
|
|
|
74
74
|
select = ["E", "F", "W", "I", "UP", "B", "SIM", "RUF", "Q", "C4", "PT", "N", "ANN"]
|
|
75
75
|
|
|
76
76
|
[tool.ruff.lint.per-file-ignores]
|
|
77
|
-
"tests/*" = ["
|
|
77
|
+
"tests/*" = ["ANN001", "ANN003", "ANN201", "ANN202", "ANN204", "ANN205", "D100", "D101", "D102", "D400", "D415", "E501", "N802", "PT011"]
|
|
78
78
|
|
|
79
79
|
[tool.ruff.lint.flake8-quotes]
|
|
80
80
|
inline-quotes = "double"
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from typing import Any, cast
|
|
5
|
+
|
|
6
|
+
from hyperbase.constants import ATOM_ENCODE_TABLE
|
|
7
|
+
from hyperbase.hyperedge import Atom, Hyperedge, UniqueAtom
|
|
8
|
+
from hyperbase.parsers.parse_result import ParseResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def str_to_atom(s: str) -> str:
|
|
12
|
+
"""Converts a string into a valid atom."""
|
|
13
|
+
return s.lower().translate(ATOM_ENCODE_TABLE)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _edge_str_has_outer_parens(edge_str: str) -> bool:
|
|
17
|
+
"""Check if string representation of edge is delimited by outer
|
|
18
|
+
parenthesis.
|
|
19
|
+
"""
|
|
20
|
+
if len(edge_str) < 2:
|
|
21
|
+
return False
|
|
22
|
+
return edge_str[0] == "("
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def split_edge_str(edge_str: str) -> tuple[str, ...]:
|
|
26
|
+
"""Shallow split into tokens of a string representation of an edge,
|
|
27
|
+
without outer parenthesis.
|
|
28
|
+
"""
|
|
29
|
+
start = 0
|
|
30
|
+
depth = 0
|
|
31
|
+
str_length = len(edge_str)
|
|
32
|
+
active = 0
|
|
33
|
+
tokens: list[str] = []
|
|
34
|
+
for i in range(str_length):
|
|
35
|
+
c = edge_str[i]
|
|
36
|
+
if c == " ":
|
|
37
|
+
if active and depth == 0:
|
|
38
|
+
tokens.append(edge_str[start:i])
|
|
39
|
+
active = 0
|
|
40
|
+
elif c == "(":
|
|
41
|
+
if depth == 0:
|
|
42
|
+
active = 1
|
|
43
|
+
start = i
|
|
44
|
+
depth += 1
|
|
45
|
+
elif c == ")":
|
|
46
|
+
depth -= 1
|
|
47
|
+
if depth == 0:
|
|
48
|
+
tokens.append(edge_str[start : i + 1])
|
|
49
|
+
active = 0
|
|
50
|
+
elif depth < 0:
|
|
51
|
+
raise ValueError(f"Unbalanced parenthesis in edge string: '{edge_str}'")
|
|
52
|
+
else:
|
|
53
|
+
if not active:
|
|
54
|
+
active = 1
|
|
55
|
+
start = i
|
|
56
|
+
|
|
57
|
+
if active:
|
|
58
|
+
if depth > 0:
|
|
59
|
+
raise ValueError(f"Unbalanced parenthesis in edge string: '{edge_str}'")
|
|
60
|
+
else:
|
|
61
|
+
tokens.append(edge_str[start:])
|
|
62
|
+
|
|
63
|
+
return tuple(tokens)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _hedge_from_str(source: str) -> Hyperedge:
|
|
67
|
+
"""Iteratively parse an edge string into a Hyperedge.
|
|
68
|
+
|
|
69
|
+
Uses an explicit stack rather than recursion so that pathologically
|
|
70
|
+
nested edge strings cannot exhaust Python's call stack. Each frame in
|
|
71
|
+
the stack represents one open ``(...)`` group being assembled and
|
|
72
|
+
holds: ``[parens_flag, tokens, next_token_index, children_built]``.
|
|
73
|
+
"""
|
|
74
|
+
edge_str = source.strip().replace("\n", " ")
|
|
75
|
+
parens = _edge_str_has_outer_parens(edge_str)
|
|
76
|
+
inner = edge_str[1:-1] if parens else edge_str
|
|
77
|
+
|
|
78
|
+
tokens = split_edge_str(inner)
|
|
79
|
+
if not tokens:
|
|
80
|
+
raise ValueError(f"Edge string is empty: '{source}'")
|
|
81
|
+
|
|
82
|
+
stack: list[list[Any]] = [[parens, tokens, 0, []]]
|
|
83
|
+
final: Hyperedge | None = None
|
|
84
|
+
|
|
85
|
+
while stack:
|
|
86
|
+
frame = stack[-1]
|
|
87
|
+
if frame[2] >= len(frame[1]):
|
|
88
|
+
# All tokens for this frame consumed; build the edge.
|
|
89
|
+
children: list[Hyperedge] = frame[3]
|
|
90
|
+
frame_parens: bool = frame[0]
|
|
91
|
+
if len(children) == 1 and isinstance(children[0], Atom):
|
|
92
|
+
built: Hyperedge = Atom(str(children[0]), frame_parens)
|
|
93
|
+
elif children:
|
|
94
|
+
built = Hyperedge(tuple(children))
|
|
95
|
+
else:
|
|
96
|
+
# Unreachable: empty token lists are rejected before push,
|
|
97
|
+
# but keep the guard for defensiveness.
|
|
98
|
+
raise ValueError(f"Edge string is empty: '{source}'")
|
|
99
|
+
stack.pop()
|
|
100
|
+
if stack:
|
|
101
|
+
stack[-1][3].append(built)
|
|
102
|
+
else:
|
|
103
|
+
final = built
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
token = frame[1][frame[2]]
|
|
107
|
+
frame[2] += 1
|
|
108
|
+
if _edge_str_has_outer_parens(token):
|
|
109
|
+
inner_tok = token[1:-1]
|
|
110
|
+
sub_tokens = split_edge_str(inner_tok)
|
|
111
|
+
if not sub_tokens:
|
|
112
|
+
raise ValueError(f"Edge string is empty: '{token}'")
|
|
113
|
+
stack.append([True, sub_tokens, 0, []])
|
|
114
|
+
else:
|
|
115
|
+
frame[3].append(Atom(token))
|
|
116
|
+
|
|
117
|
+
assert final is not None # loop guarantees this
|
|
118
|
+
return final
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _collect_positions(tok_pos: Hyperedge) -> list[int]:
|
|
122
|
+
"""Collect all valid (>= 0) token positions from a tok_pos tree."""
|
|
123
|
+
if tok_pos.atom:
|
|
124
|
+
pos = int(str(tok_pos))
|
|
125
|
+
return [pos] if pos >= 0 else []
|
|
126
|
+
else:
|
|
127
|
+
positions: list[int] = []
|
|
128
|
+
for sub in tok_pos:
|
|
129
|
+
positions.extend(_collect_positions(sub))
|
|
130
|
+
return positions
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _rebuild_with_text(
|
|
134
|
+
edge: Hyperedge,
|
|
135
|
+
tok_pos: Hyperedge,
|
|
136
|
+
tokens: list[str],
|
|
137
|
+
) -> Hyperedge:
|
|
138
|
+
"""Recursively rebuild an edge, assigning text from tokens and tok_pos."""
|
|
139
|
+
if edge.atom:
|
|
140
|
+
atom = cast(Atom, edge)
|
|
141
|
+
pos = int(str(tok_pos))
|
|
142
|
+
text = tokens[pos] if pos >= 0 else None
|
|
143
|
+
return Atom(str(atom), atom.parens, text=text)
|
|
144
|
+
else:
|
|
145
|
+
new_children = tuple(
|
|
146
|
+
_rebuild_with_text(sub_edge, sub_tok_pos, tokens)
|
|
147
|
+
for sub_edge, sub_tok_pos in zip(edge, tok_pos, strict=False)
|
|
148
|
+
)
|
|
149
|
+
positions = _collect_positions(tok_pos)
|
|
150
|
+
if positions:
|
|
151
|
+
min_pos = min(positions)
|
|
152
|
+
max_pos = max(positions)
|
|
153
|
+
text = " ".join(tokens[min_pos : max_pos + 1])
|
|
154
|
+
else:
|
|
155
|
+
text = None
|
|
156
|
+
return Hyperedge(new_children, text=text)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def hedge(
|
|
160
|
+
source: str | Hyperedge | list | tuple | ParseResult,
|
|
161
|
+
) -> Hyperedge:
|
|
162
|
+
"""Create a hyperedge."""
|
|
163
|
+
if isinstance(source, ParseResult):
|
|
164
|
+
_source = source
|
|
165
|
+
edge = _rebuild_with_text(_source.edge, _source.tok_pos, _source.tokens)
|
|
166
|
+
object.__setattr__(edge, "text", _source.text)
|
|
167
|
+
return edge
|
|
168
|
+
if type(source) in {tuple, list}:
|
|
169
|
+
_source = cast(Iterable, source)
|
|
170
|
+
return Hyperedge(tuple(hedge(item) for item in _source))
|
|
171
|
+
elif type(source) is str:
|
|
172
|
+
return _hedge_from_str(source)
|
|
173
|
+
elif type(source) in {Hyperedge, Atom, UniqueAtom}:
|
|
174
|
+
return source # type: ignore
|
|
175
|
+
else:
|
|
176
|
+
raise TypeError(
|
|
177
|
+
f"Cannot create hyperedge from {type(source).__name__}: {source!r}"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def build_atom(text: str, *parts: str) -> Atom:
|
|
182
|
+
"""Build an atom from text and other parts."""
|
|
183
|
+
atom = str_to_atom(text)
|
|
184
|
+
parts_str = "/".join([part for part in parts if part])
|
|
185
|
+
if len(parts_str) > 0:
|
|
186
|
+
atom_str = "".join((atom, "/", parts_str))
|
|
187
|
+
return Atom(atom_str)
|