parsanol 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +7 -0
  2. data/Cargo.lock +546 -0
  3. data/Cargo.toml +9 -0
  4. data/HISTORY.txt +12 -0
  5. data/LICENSE +23 -0
  6. data/README.adoc +487 -0
  7. data/Rakefile +135 -0
  8. data/ext/parsanol_native/Cargo.toml +34 -0
  9. data/ext/parsanol_native/extconf.rb +15 -0
  10. data/ext/parsanol_native/src/lib.rs +17 -0
  11. data/lib/parsanol/ast_visitor.rb +122 -0
  12. data/lib/parsanol/atoms/alternative.rb +122 -0
  13. data/lib/parsanol/atoms/base.rb +202 -0
  14. data/lib/parsanol/atoms/can_flatten.rb +194 -0
  15. data/lib/parsanol/atoms/capture.rb +38 -0
  16. data/lib/parsanol/atoms/context.rb +334 -0
  17. data/lib/parsanol/atoms/context_optimized.rb +38 -0
  18. data/lib/parsanol/atoms/custom.rb +110 -0
  19. data/lib/parsanol/atoms/cut.rb +66 -0
  20. data/lib/parsanol/atoms/dsl.rb +96 -0
  21. data/lib/parsanol/atoms/dynamic.rb +39 -0
  22. data/lib/parsanol/atoms/entity.rb +75 -0
  23. data/lib/parsanol/atoms/ignored.rb +37 -0
  24. data/lib/parsanol/atoms/infix.rb +162 -0
  25. data/lib/parsanol/atoms/lookahead.rb +82 -0
  26. data/lib/parsanol/atoms/named.rb +74 -0
  27. data/lib/parsanol/atoms/re.rb +83 -0
  28. data/lib/parsanol/atoms/repetition.rb +259 -0
  29. data/lib/parsanol/atoms/scope.rb +35 -0
  30. data/lib/parsanol/atoms/sequence.rb +194 -0
  31. data/lib/parsanol/atoms/str.rb +103 -0
  32. data/lib/parsanol/atoms/visitor.rb +91 -0
  33. data/lib/parsanol/atoms.rb +46 -0
  34. data/lib/parsanol/buffer.rb +133 -0
  35. data/lib/parsanol/builder_callbacks.rb +353 -0
  36. data/lib/parsanol/cause.rb +122 -0
  37. data/lib/parsanol/context.rb +39 -0
  38. data/lib/parsanol/convenience.rb +36 -0
  39. data/lib/parsanol/edit_tracker.rb +111 -0
  40. data/lib/parsanol/error_reporter/contextual.rb +99 -0
  41. data/lib/parsanol/error_reporter/deepest.rb +120 -0
  42. data/lib/parsanol/error_reporter/tree.rb +63 -0
  43. data/lib/parsanol/error_reporter.rb +100 -0
  44. data/lib/parsanol/expression/treetop.rb +154 -0
  45. data/lib/parsanol/expression.rb +106 -0
  46. data/lib/parsanol/fast_mode.rb +149 -0
  47. data/lib/parsanol/first_set.rb +79 -0
  48. data/lib/parsanol/grammar_builder.rb +177 -0
  49. data/lib/parsanol/incremental_parser.rb +177 -0
  50. data/lib/parsanol/interval_tree.rb +217 -0
  51. data/lib/parsanol/lazy_result.rb +179 -0
  52. data/lib/parsanol/lexer.rb +144 -0
  53. data/lib/parsanol/mermaid.rb +139 -0
  54. data/lib/parsanol/native/parser.rb +612 -0
  55. data/lib/parsanol/native/serializer.rb +248 -0
  56. data/lib/parsanol/native/transformer.rb +435 -0
  57. data/lib/parsanol/native/types.rb +42 -0
  58. data/lib/parsanol/native.rb +217 -0
  59. data/lib/parsanol/optimizer.rb +85 -0
  60. data/lib/parsanol/optimizers/choice_optimizer.rb +78 -0
  61. data/lib/parsanol/optimizers/cut_inserter.rb +179 -0
  62. data/lib/parsanol/optimizers/lookahead_optimizer.rb +50 -0
  63. data/lib/parsanol/optimizers/quantifier_optimizer.rb +60 -0
  64. data/lib/parsanol/optimizers/sequence_optimizer.rb +97 -0
  65. data/lib/parsanol/options/ruby_transform.rb +107 -0
  66. data/lib/parsanol/options/serialized.rb +94 -0
  67. data/lib/parsanol/options/zero_copy.rb +128 -0
  68. data/lib/parsanol/options.rb +20 -0
  69. data/lib/parsanol/parallel.rb +133 -0
  70. data/lib/parsanol/parser.rb +182 -0
  71. data/lib/parsanol/parslet.rb +151 -0
  72. data/lib/parsanol/pattern/binding.rb +91 -0
  73. data/lib/parsanol/pattern.rb +159 -0
  74. data/lib/parsanol/pool.rb +219 -0
  75. data/lib/parsanol/pools/array_pool.rb +75 -0
  76. data/lib/parsanol/pools/buffer_pool.rb +175 -0
  77. data/lib/parsanol/pools/position_pool.rb +92 -0
  78. data/lib/parsanol/pools/slice_pool.rb +64 -0
  79. data/lib/parsanol/position.rb +94 -0
  80. data/lib/parsanol/resettable.rb +29 -0
  81. data/lib/parsanol/result.rb +46 -0
  82. data/lib/parsanol/result_builder.rb +208 -0
  83. data/lib/parsanol/result_stream.rb +261 -0
  84. data/lib/parsanol/rig/rspec.rb +71 -0
  85. data/lib/parsanol/rope.rb +81 -0
  86. data/lib/parsanol/scope.rb +104 -0
  87. data/lib/parsanol/slice.rb +146 -0
  88. data/lib/parsanol/source/line_cache.rb +109 -0
  89. data/lib/parsanol/source.rb +180 -0
  90. data/lib/parsanol/source_location.rb +167 -0
  91. data/lib/parsanol/streaming_parser.rb +124 -0
  92. data/lib/parsanol/string_view.rb +195 -0
  93. data/lib/parsanol/transform.rb +226 -0
  94. data/lib/parsanol/version.rb +5 -0
  95. data/lib/parsanol/wasm/README.md +80 -0
  96. data/lib/parsanol/wasm/package.json +51 -0
  97. data/lib/parsanol/wasm/parsanol.js +252 -0
  98. data/lib/parsanol/wasm/parslet.d.ts +129 -0
  99. data/lib/parsanol/wasm_parser.rb +240 -0
  100. data/lib/parsanol.rb +280 -0
  101. data/parsanol-ruby.gemspec +67 -0
  102. metadata +293 -0
@@ -0,0 +1,80 @@
1
+ # @parsanol/wasm
2
+
3
+ High-performance PEG parser using WebAssembly. Designed for use with Opal (Ruby in JavaScript) and general JavaScript applications.
4
+
5
+ ## Features
6
+
7
+ - **18-44x faster** than pure Ruby parser
8
+ - **99.5% fewer allocations**
9
+ - Works in browsers and Node.js
10
+ - Full TypeScript support
11
+ - Compatible with Opal
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ npm install @parsanol/wasm
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ### Browser/ESM
22
+
23
+ ```html
24
+ <script type="module">
25
+ import { initParsanol, ParsanolParser } from '@parsanol/wasm';
26
+
27
+ // Initialize WASM (call once)
28
+ await initParsanol();
29
+
30
+ // Create parser from grammar JSON
31
+ const grammar = {
32
+ atoms: [
33
+ { Str: { pattern: "hello" } }
34
+ ],
35
+ root: 0
36
+ };
37
+
38
+ const parser = new ParsanolParser(grammar);
39
+
40
+ // Parse input
41
+ const result = parser.parse('hello');
42
+ console.log(result); // "hello"
43
+ </script>
44
+ ```
45
+
46
+ ### Node.js
47
+
48
+ ```javascript
49
+ const { initParsanol, ParsanolParser } = require('@parsanol/wasm');
50
+
51
+ async function main() {
52
+ await initParsanol();
53
+
54
+ const parser = new ParsanolParser(grammarJson);
55
+ const result = parser.parse('input text');
56
+ console.log(result);
57
+ }
58
+
59
+ main();
60
+ ```
61
+
62
+ ### Opal (Ruby in Browser)
63
+
64
+ ```ruby
65
+ # First initialize WASM in JavaScript:
66
+ # Parsanol::WasmParser.init.then { puts "ready" }
67
+
68
+ require 'parsanol/wasm_parser'
69
+
70
+ grammar_json = {
71
+ atoms: [
72
+ { Str: { pattern: "hello" } }
73
+ ],
74
+ root: 0
75
+ }.to_json
76
+
77
+ parser = Parsanol::WasmParser.new(grammar_json)
78
+ result = parser.parse('hello')
79
+ puts result # => "hello"
80
+ ```
@@ -0,0 +1,51 @@
1
+ {
2
+ "name": "@parsanol/wasm",
3
+ "version": "0.1.0",
4
+ "description": "High-performance PEG parser using WebAssembly for Opal and JavaScript",
5
+ "main": "parsanol.js",
6
+ "module": "parsanol.js",
7
+ "types": "parsanol.d.ts",
8
+ "browser": "parsanol.js",
9
+ "files": [
10
+ "parsanol.js",
11
+ "parsanol.d.ts",
12
+ "parsanol_native.js",
13
+ "parsanol_native_bg.js",
14
+ "parsanol_native_bg.wasm",
15
+ "parsanol_native.d.ts"
16
+ ],
17
+ "scripts": {
18
+ "build": "cd ../../ext/parsanol_native && wasm-pack build --features wasm --target web --out-dir ../../lib/parsanol/wasm",
19
+ "build:node": "cd ../../ext/parsanol_native && wasm-pack build --features wasm --target nodejs --out-dir ../../lib/parsanol/wasm-node",
20
+ "test": "node test/parsanol.test.js",
21
+ "prepublishOnly": "npm run build"
22
+ },
23
+ "keywords": [
24
+ "parser",
25
+ "peg",
26
+ "parsing-expression-grammar",
27
+ "wasm",
28
+ "webassembly",
29
+ "opal",
30
+ "ruby",
31
+ "performance"
32
+ ],
33
+ "author": "Ribose Inc. <open.source@ribose.com>",
34
+ "license": "MIT",
35
+ "repository": {
36
+ "type": "git",
37
+ "url": "https://github.com/parsanol/parsanol-rs.git",
38
+ "directory": "lib/parsanol/wasm"
39
+ },
40
+ "bugs": {
41
+ "url": "https://github.com/parsanol/parsanol-ruby/issues"
42
+ },
43
+ "homepage": "https://github.com/parsanol/parsanol-ruby#readme",
44
+ "engines": {
45
+ "node": ">=14.0.0"
46
+ },
47
+ "sideEffects": [
48
+ "parsanol_native.js",
49
+ "parsanol_native_bg.js"
50
+ ]
51
+ }
@@ -0,0 +1,252 @@
1
+ /**
2
+ * Parsanol WASM Parser
3
+ *
4
+ * High-performance parser using WebAssembly for use in browsers and Node.js.
5
+ * Compatible with Opal (Ruby in JavaScript) for parsing in the browser.
6
+ *
7
+ * @example
8
+ * // Browser/ESM
9
+ * import { ParsanolParser } from '@parsanol/wasm';
10
+ * const parser = new ParsanolParser(grammarJson);
11
+ * const result = parser.parse('input text');
12
+ *
13
+ * @example
14
+ * // Node.js
15
+ * const { ParsanolParser } = require('@parsanol/wasm');
16
+ * const parser = new ParsanolParser(grammarJson);
17
+ * const result = parser.parse('input text');
18
+ *
19
+ * @example
20
+ * // Opal
21
+ * %x{
22
+ * var parser = new ParsanolNative.WasmParser(#{grammar_json});
23
+ * var result = parser.parse(#{input});
24
+ * return result;
25
+ * }
26
+ */
27
+
28
+ import init, { WasmParser } from './parsanol_native.js';
29
+
30
+ let initialized = false;
31
+ let initPromise = null;
32
+
33
+ /**
34
+ * Initialize the WASM module
35
+ * Must be called before creating parsers (automatically called on first use)
36
+ *
37
+ * @returns {Promise<void>}
38
+ */
39
+ export async function initParsanol() {
40
+ if (initialized) return;
41
+ if (initPromise) return initPromise;
42
+
43
+ initPromise = init().then(() => {
44
+ initialized = true;
45
+ });
46
+
47
+ return initPromise;
48
+ }
49
+
50
+ /**
51
+ * Check if the WASM module is initialized
52
+ *
53
+ * @returns {boolean}
54
+ */
55
+ export function isInitialized() {
56
+ return initialized;
57
+ }
58
+
59
+ /**
60
+ * High-performance parser using WebAssembly
61
+ *
62
+ * Usage:
63
+ * const parser = new ParsanolParser(grammarJson);
64
+ * const result = parser.parse('input');
65
+ * console.log(result);
66
+ */
67
+ export class ParsanolParser {
68
+ #parser = null;
69
+ #grammarJson = null;
70
+
71
+ /**
72
+ * Create a new parser instance
73
+ *
74
+ * @param {string|object} grammar - Grammar JSON string or object
75
+ * @throws {Error} If WASM not initialized or grammar is invalid
76
+ */
77
+ constructor(grammar) {
78
+ if (!initialized) {
79
+ throw new Error('Parsanol WASM not initialized. Call initParsanol() first.');
80
+ }
81
+
82
+ this.#grammarJson = typeof grammar === 'string' ? grammar : JSON.stringify(grammar);
83
+ this.#parser = new WasmParser(this.#grammarJson);
84
+ }
85
+
86
+ /**
87
+ * Parse input string and return AST
88
+ *
89
+ * @param {string} input - Input string to parse
90
+ * @returns {object} Parsed AST as JavaScript object
91
+ * @throws {Error} If parsing fails
92
+ */
93
+ parse(input) {
94
+ return this.#parser.parse(input);
95
+ }
96
+
97
+ /**
98
+ * Parse input and return flat array format
99
+ * More efficient for large results (avoids object creation)
100
+ *
101
+ * @param {string} input - Input string to parse
102
+ * @returns {BigUint64Array} Flat array with tagged values
103
+ * @throws {Error} If parsing fails
104
+ */
105
+ parseFlat(input) {
106
+ return this.#parser.parse_flat(input);
107
+ }
108
+
109
+ /**
110
+ * Parse input and return JSON string
111
+ * Useful for transferring to other contexts
112
+ *
113
+ * @param {string} input - Input string to parse
114
+ * @returns {string} JSON string of parsed AST
115
+ * @throws {Error} If parsing fails
116
+ */
117
+ parseJson(input) {
118
+ return this.#parser.parse_json(input);
119
+ }
120
+
121
+ /**
122
+ * Reset parser state for reuse
123
+ */
124
+ reset() {
125
+ // Parser state is reset automatically on each parse
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Decode flat array format to JavaScript object
131
+ *
132
+ * Tag format:
133
+ * - 0x00: nil
134
+ * - 0x01: bool (followed by 0 or 1)
135
+ * - 0x02: int (followed by value)
136
+ * - 0x03: float (followed by bits)
137
+ * - 0x04: string (followed by offset, length)
138
+ * - 0x05: array start
139
+ * - 0x06: array end
140
+ * - 0x07: hash start
141
+ * - 0x08: hash end
142
+ * - 0x09: hash key
143
+ *
144
+ * @param {BigUint64Array} flat - Flat array from parseFlat()
145
+ * @param {string} input - Original input string for string references
146
+ * @returns {any} Decoded JavaScript value
147
+ */
148
+ export function decodeFlatArray(flat, input) {
149
+ const TAG_NIL = 0x00n;
150
+ const TAG_BOOL = 0x01n;
151
+ const TAG_INT = 0x02n;
152
+ const TAG_FLOAT = 0x03n;
153
+ const TAG_STRING = 0x04n;
154
+ const TAG_ARRAY_START = 0x05n;
155
+ const TAG_ARRAY_END = 0x06n;
156
+ const TAG_HASH_START = 0x07n;
157
+ const TAG_HASH_END = 0x08n;
158
+ const TAG_HASH_KEY = 0x09n;
159
+
160
+ let pos = 0;
161
+
162
+ function decode() {
163
+ const tag = flat[pos++];
164
+
165
+ switch (tag) {
166
+ case TAG_NIL:
167
+ return null;
168
+
169
+ case TAG_BOOL:
170
+ return flat[pos++] !== 0n;
171
+
172
+ case TAG_INT:
173
+ return Number(flat[pos++]);
174
+
175
+ case TAG_FLOAT: {
176
+ const bits = flat[pos++];
177
+ return new Float64Array(new BigUint64Array([bits]).buffer)[0];
178
+ }
179
+
180
+ case TAG_STRING: {
181
+ const offset = Number(flat[pos++]);
182
+ const length = Number(flat[pos++]);
183
+ return input.substring(offset, offset + length);
184
+ }
185
+
186
+ case TAG_ARRAY_START: {
187
+ const arr = [];
188
+ while (flat[pos] !== TAG_ARRAY_END) {
189
+ arr.push(decode());
190
+ }
191
+ pos++; // Skip ARRAY_END
192
+ return arr;
193
+ }
194
+
195
+ case TAG_HASH_START: {
196
+ const obj = {};
197
+ while (flat[pos] !== TAG_HASH_END) {
198
+ // Skip TAG_HASH_KEY
199
+ pos++;
200
+
201
+ // Read key
202
+ const keyLen = Number(flat[pos++]);
203
+ // Skip placeholder
204
+ pos++;
205
+
206
+ // Read key bytes
207
+ let key = '';
208
+ const numChunks = Math.ceil(keyLen / 8);
209
+ for (let i = 0; i < numChunks; i++) {
210
+ const chunk = flat[pos++];
211
+ for (let j = 0; j < 8 && key.length < keyLen; j++) {
212
+ const byte = Number((chunk >> BigInt(j * 8)) & 0xffn);
213
+ key += String.fromCharCode(byte);
214
+ }
215
+ }
216
+
217
+ // Read value
218
+ const value = decode();
219
+ obj[key] = value;
220
+ }
221
+ pos++; // Skip HASH_END
222
+ return obj;
223
+ }
224
+
225
+ default:
226
+ throw new Error(`Unknown tag: ${tag}`);
227
+ }
228
+ }
229
+
230
+ return decode();
231
+ }
232
+
233
+ /**
234
+ * Create a parser with automatic initialization
235
+ * Convenience function that handles async initialization
236
+ *
237
+ * @param {string|object} grammar - Grammar JSON string or object
238
+ * @returns {Promise<ParsanolParser>} Initialized parser
239
+ */
240
+ export async function createParser(grammar) {
241
+ await initParsanol();
242
+ return new ParsanolParser(grammar);
243
+ }
244
+
245
+ // Default export
246
+ export default {
247
+ initParsanol,
248
+ isInitialized,
249
+ ParsanolParser,
250
+ decodeFlatArray,
251
+ createParser
252
+ };
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Type definitions for Parslet WASM Parser
3
+ *
4
+ * @packageDocumentation
5
+ */
6
+
7
+ /**
8
+ * Initialize the WASM module
9
+ * Must be called before creating parsers
10
+ */
11
+ export function initParslet(): Promise<void>;
12
+
13
+ /**
14
+ * Check if the WASM module is initialized
15
+ */
16
+ export function isInitialized(): boolean;
17
+
18
+ /**
19
+ * Grammar specification for the parser
20
+ *
21
+ * The grammar is a JSON object with atoms and a root index.
22
+ */
23
+ export interface Grammar {
24
+ /** Array of atom definitions */
25
+ atoms: Atom[];
26
+ /** Index of the root atom */
27
+ root: number;
28
+ }
29
+
30
+ /**
31
+ * Atom types in the grammar
32
+ */
33
+ export type Atom =
34
+ | { Str: { pattern: string } }
35
+ | { Re: { pattern: string } }
36
+ | { Sequence: { atoms: number[] } }
37
+ | { Alternative: { atoms: number[] } }
38
+ | { Repetition: { atom: number; min: number; max: number | null } }
39
+ | { Named: { name: string; atom: number } }
40
+ | { Entity: { atom: number } }
41
+ | { Lookahead: { atom: number; positive: boolean } }
42
+ | 'Cut';
43
+
44
+ /**
45
+ * Parse result - can be various types
46
+ */
47
+ export type ParseResult =
48
+ | null
49
+ | boolean
50
+ | number
51
+ | string
52
+ | ParseResult[]
53
+ | { [key: string]: ParseResult };
54
+
55
+ /**
56
+ * High-performance parser using WebAssembly
57
+ */
58
+ export class ParsletParser {
59
+ /**
60
+ * Create a new parser instance
61
+ *
62
+ * @param grammar - Grammar JSON string or object
63
+ * @throws {Error} If WASM not initialized or grammar is invalid
64
+ */
65
+ constructor(grammar: string | Grammar);
66
+
67
+ /**
68
+ * Parse input string and return AST
69
+ *
70
+ * @param input - Input string to parse
71
+ * @returns Parsed AST as JavaScript object
72
+ * @throws {Error} If parsing fails
73
+ */
74
+ parse(input: string): ParseResult;
75
+
76
+ /**
77
+ * Parse input and return flat array format
78
+ *
79
+ * @param input - Input string to parse
80
+ * @returns Flat array with tagged values
81
+ * @throws {Error} If parsing fails
82
+ */
83
+ parseFlat(input: string): BigUint64Array;
84
+
85
+ /**
86
+ * Parse input and return JSON string
87
+ *
88
+ * @param input - Input string to parse
89
+ * @returns JSON string of parsed AST
90
+ * @throws {Error} If parsing fails
91
+ */
92
+ parseJson(input: string): string;
93
+ }
94
+
95
+ /**
96
+ * Decode flat array format to JavaScript object
97
+ *
98
+ * @param flat - Flat array from parseFlat()
99
+ * @param input - Original input string for string references
100
+ * @returns Decoded JavaScript value
101
+ */
102
+ export function decodeFlatArray(flat: BigUint64Array, input: string): ParseResult;
103
+
104
+ /**
105
+ * Create a parser with automatic initialization
106
+ *
107
+ * @param grammar - Grammar JSON string or object
108
+ * @returns Promise resolving to initialized parser
109
+ */
110
+ export function createParser(grammar: string | Grammar): Promise<ParsletParser>;
111
+
112
+ /**
113
+ * Low-level WASM parser (from wasm-bindgen)
114
+ *
115
+ * @internal
116
+ */
117
+ export class WasmParser {
118
+ constructor(grammarJson: string);
119
+ parse(input: string): any;
120
+ parse_flat(input: string): BigUint64Array;
121
+ parse_json(input: string): string;
122
+ }
123
+
124
+ /**
125
+ * WASM module initialization function
126
+ *
127
+ * @internal
128
+ */
129
+ export default function init(): Promise<void>;