npm - @ohm-js/wasm - Versions diffs - 0.1.0 - Mend

@ohm-js/wasm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/.mise.toml +2 -0
package/AGENT.md +25 -0
package/LICENSE +21 -0
package/Makefile +23 -0
package/README.md +34 -0
package/TODO.md +28 -0
package/package.json +32 -0
package/runtime/ohmRuntime.ts +252 -0
package/scripts/bundlewasm.ts +49 -0
package/scripts/modparse.ts +397 -0
package/src/cli.js +36 -0
package/src/index.js +1195 -0
package/test/data/_book-review.liquid +257 -0
package/test/data/_es5.js +1057 -0
package/test/data/_es5.wasm +0 -0
package/test/data/_html5shiv-3.7.3.js +326 -0
package/test/data/_liquid-html.ohm +605 -0
package/test/go/README.md +67 -0
package/test/go/cst.go +164 -0
package/test/go/go.mod +5 -0
package/test/go/go.sum +2 -0
package/test/go/matcher.go +370 -0
package/test/go/testmain.go +161 -0
package/test/test-es5.js +104 -0
package/test/test-liquid-html.js +27 -0
package/test/test-wasm.js +764 -0

package/.mise.toml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [tools]
2	+ node = "24"

package/AGENT.md ADDED Viewed

@@ -0,0 +1,25 @@
+# Guide to @ohm-js/wasm
+This is the code for the @ohm-js/wasm package. Its purpose is to convert Ohm grammars to WebAssembly, and also for using the result Wasm modules from JavaScript.
+There is also code for using the Wasm modules from Go.
+## Repository structure
+- `src`: Source of the @ohm-js/wasm NPM package.
+- `test`: Tests
+- `runtime`: AssemblyScript code, which is used to write the runtime support library for the JavaScript code.
+- `test/go`: Go code and tests for using the generate grammars from Go.
+## Build and test commands
+- `pnpm test` to run the JavaScript tests.
+- `make go-test-es5` to run the Go tests.
+- `make` to rebuild the runtime support library after changing the AssemblyScript code.
+## Code style guidelines
+- Strive to write the smallest amount of code that will solve the problem as posed. Do not add things "just in case".
+- Do not include JSDoc in the code that you write.
+- Use comments sparingly. Only leave comments where the meaning is not clear to an informed reader of the code.
+- When the Go and JS code handle similar things, the Go code should follow the patterns of the JavaScript code, but it should also look like idiomatic Go. It's better for the Go code to have a slightly different interface than to be very un-idiomatic.

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2014-2022 Alessandro Warth and the Ohm project contributors.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/Makefile ADDED Viewed

@@ -0,0 +1,23 @@
+.PHONY: all
+all: out/ohmRuntime.wasm_sections.ts
+out/ohmRuntime.wasm_sections.ts: scripts/bundlewasm.ts build/ohmRuntime.wasm
+	node scripts/bundlewasm.ts build/ohmRuntime.wasm
+build/ohmRuntime.wasm: runtime/ohmRuntime.ts
+	npx asc --runtime stub -o build/ohmRuntime.wasm --memoryBase 67239936 runtime/ohmRuntime.ts
+.PHONY: print-runtime
+print-runtime:
+	npx asc --runtime stub --memoryBase 67239936 runtime/ohmRuntime.ts
+.PHONY: go-test-es5
+go-test-es5: test/go/testmain
+	cd test/go && ./testmain -wasm ../data/_es5.wasm -file ../data/_html5shiv-3.7.3.js
+.PHONY: go-test-es5-lite
+go-test-es5-lite: test/go/testmain
+	cd test/go && ./testmain -wasm ../data/_es5.wasm -input "var x = 3; function foo() {}"
+test/go/testmain: test/go/testmain.go test/go/matcher.go
+	cd test/go && go mod tidy && go build -o testmain

package/README.md ADDED Viewed

@@ -0,0 +1,34 @@
+# @ohm-js/wasm
+Compile Ohm.js grammars to WebAsssembly, so they can be used from other languages.
+To use the grammar, use the appropriate _miniohm_ package for your language.
+**NOTE:** This package is experimental; the API is not yet stable.
+## Prerequisites
+This package requires Node 24.
+## Usage (compiling to Wasm)
+### Command line
+```
+npx ohm2wasm my-grammar.ohm
+```
+This will write a Wasm grammar blob to ./my-grammar.wasm.
+### API
+```
+import * as ohm from 'ohm-js';
+import {Compiler} from '@ohm-js/wasm';
+// Instantiate your own grammar…
+const g = ohm.grammar('MyGrammar { start = "blah" }');
+// compile() returns the Wasm grammar blob as a Uint8Array.
+const bytes = new Compiler(g).compile();
+```

package/TODO.md ADDED Viewed

@@ -0,0 +1,28 @@
+## TODOs
+- [x] Include a map of rule name to ruleId in the module.
+- [ ] Implicit space skipping
+- [ ] Error handling
+- [x] NonterminalNodes should keep track of the rule
+- [ ] When iteration contains a sequence, the children are flattened into the iter node.
+- [x] Basic parameterized rules
+- [ ] Parameterized rules with >3 params
+- [ ] Parameters that aren't terminals
+- [ ] Memoization for parameterized rules
+- [x] Support direct left recursion.
+- [ ] Handle left recursion detection at grammar parse time.
+- [ ] Separate API for _creating_ the Wasm module from the WasmMatcher interface.
+- [ ] Implement a proper CLI.
+## Limitations
+- The input is assumed to be no bigger than 64k.
+- For the memo table, we assume that there are no more than 256 rules in the grammar.
+- Parameterized rules only support up to 3 parameters, and no memoization.
+  - Parameters must be terminals.
+## Unanswered questions
+- How to deal with matchLength in lookahead. In regular Ohm, lookahead _does_ bind things. But that is hard to square with the current CST representation, that stores only the matchLength. Because somehow the things inside a lookahead must consume nothing — but if you have `&("a" "b")`, the only way to make them consume nothing (in the current representation) is to rewrite the matchLength of the two terminal nodes.
+  - Could we introduce a pseudo-node for lookahead? It could get transparently unpacked when walking the tree.
+- Memoization of parameterized rules: Alex suggested assigning memoization keys statically to unique applications

package/package.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "name": "@ohm-js/wasm",
+  "version": "0.1.0",
+  "description": "Compile Ohm.js grammars to WebAsssembly",
+  "main": "src/index.js",
+  "bin": {
+    "ohm2wasm": "src/cli.js"
+  },
+  "type": "module",
+  "keywords": [],
+  "author": "Patrick Dubroy <pdubroy@gmail.com>",
+  "license": "MIT",
+  "devDependencies": {
+    "@thi.ng/leb128": "^3.1.48",
+    "assemblyscript": "^0.27.36",
+    "ava": "^6.2.0",
+    "liquid-html-parser": "link:@shopify/liquid-html-parser",
+    "wabt": "1.0.37-nightly.20250428"
+  },
+  "peerDependencies": {
+    "ohm-js": "^17.1.0"
+  },
+  "dependencies": {
+    "@shopify/liquid-html-parser": "^2.8.2",
+    "@wasmgroundup/emit": "^1.0.2"
+  },
+  "scripts": {
+    "build-and-test": "make && ava test",
+    "check-node-version": "node -e \"process.exit(parseInt(process.versions.node.split('.')[0]) < 24 ? 1 : 0)\"",
+    "test": "pnpm run check-node-version && pnpm run build-and-test || echo 'Skipping tests: Node 24 required'"
+  }
+}

package/runtime/ohmRuntime.ts ADDED Viewed

@@ -0,0 +1,252 @@
+type Result = i32;
+declare function fillInputBuffer(offset: i32, maxLen: i32): i32;
+declare function printI32(val: i32): void;
+// TODO: Find a way to share these.
+@inline const WASM_PAGE_SIZE: usize = 64 * 1024;
+@inline const MEMO_START_OFFSET: usize = 2 * WASM_PAGE_SIZE;
+@inline const MEMO_COL_SIZE_BYTES: usize = 4 * 256;
+@inline const STACK_START_OFFSET: usize = WASM_PAGE_SIZE;
+@inline const MAX_INPUT_LEN_BYTES: usize = 64 * 1024;
+// Note: the rule evaluation functions use a different representation.
+// They return non-zero for success and zero for failure.
+@inline const EMPTY: Result = 0;
+@inline const FAIL: Result = 0xfffffff0;
+@inline const UNUSED_LR_BOMB: Result = FAIL | 0x1;
+@inline const USED_LR_BOMB: Result = FAIL | 0x3;
+@inline const CST_NODE_OVERHEAD: usize = 12;
+@inline const NODE_TYPE_ITERATION: i32 = -2;
+// Shared globals
+let pos: i32 = 0;
+let sp: usize = 0;
+let bindings: Array<i32> = new Array<i32>();
+@inline function memoTableGet(memoPos: usize, ruleId: i32): Result {
+  return load<Result>(memoPos * MEMO_COL_SIZE_BYTES + ruleId * sizeof<Result>(), MEMO_START_OFFSET);
+}
+@inline function memoTableSet(memoPos: usize, ruleId: i32, value: Result): void {
+  store<Result>(memoPos * MEMO_COL_SIZE_BYTES + ruleId * sizeof<Result>(), value, MEMO_START_OFFSET);
+}
+@inline function cstGetCount(ptr: usize): i32 {
+  return load<i32>(ptr, 0);
+}
+@inline function cstSetCount(ptr: usize, count: i32): void {
+  store<i32>(ptr, count, 0);
+}
+@inline function cstGetMatchLength(ptr: usize): i32 {
+  return load<i32>(ptr, 4);
+}
+@inline function cstSetMatchLength(ptr: usize, len: i32): void {
+  store<i32>(ptr, len, 4);
+}
+@inline function cstGetType(ptr: usize): i32 {
+  return load<i32>(ptr, 8);
+}
+@inline function cstSetType(ptr: usize, t: i32): void {
+  store<i32>(ptr, t, 8);
+}
+@inline function memoizeResult(memoPos: usize, ruleId: i32, result: Result): void {
+  memoTableSet(memoPos, ruleId, result);
+}
+@inline function isFailure(result: Result): bool {
+  return result < 0;
+}
+function useMemoizedResult(ruleId: i32, result: Result): Result {
+  if (result === UNUSED_LR_BOMB) {
+    memoTableSet(pos, ruleId, USED_LR_BOMB);
+    return 0;
+  } else if (isFailure(result)) {
+    return 0;
+  }
+  pos += cstGetMatchLength(result);
+  bindings.push(result);
+  return result;
+}
+function hasMemoizedResult(ruleId: i32): boolean {
+  return memoTableGet(pos, ruleId) !== 0;
+}
+export function match(startRuleId: i32): Result {
+  // (Re-)initialize globals, clear memo table.
+  pos = 0;
+  sp = STACK_START_OFFSET;
+  bindings = new Array<i32>();
+  memory.fill(MEMO_START_OFFSET, 0, MEMO_COL_SIZE_BYTES * MAX_INPUT_LEN_BYTES);
+  // Get the input and do the match.
+  let inputLen = fillInputBuffer(0, i32(WASM_PAGE_SIZE));
+  const succeeded = evalApply0(startRuleId) !== 0;
+  if (inputLen === pos) {
+    return succeeded;
+  }
+  return 0;
+}
+@inline function evalRuleBody(ruleId: i32): Result {
+  return call_indirect<Result>(ruleId);
+}
+export function evalApplyNoMemo0(ruleId: i32): Result {
+  const origPos = pos;
+  const origNumBindings = bindings.length;
+  if (evalRuleBody(ruleId)) {
+    return newNonterminalNode(origPos, pos, ruleId, origNumBindings);
+  }
+  return 0;
+}
+export function evalApply0(ruleId: i32): Result {
+  let result = memoTableGet(pos, ruleId);
+  if (result !== 0) {
+    return useMemoizedResult(ruleId, result);
+  }
+  const origPos = pos;
+  let origNumBindings = bindings.length;
+  memoizeResult(origPos, ruleId, UNUSED_LR_BOMB);
+  let succeeded: i32 = evalRuleBody(ruleId);
+  // Straight failure — record a clean failure in the memo table.
+  if (!succeeded) {
+    memoizeResult(origPos, ruleId, FAIL);
+    return 0;
+  }
+  if (memoTableGet(origPos, ruleId) === USED_LR_BOMB) {
+    return handleLeftRecursion(origPos, ruleId, origNumBindings);
+  }
+  // No left recursion — memoize and return.
+  result = newNonterminalNode(origPos, pos, ruleId, origNumBindings);
+  memoizeResult(origPos, ruleId, result);
+  return result;
+}
+export function handleLeftRecursion(origPos: usize, ruleId: i32, origNumBindings: i32): Result {
+  let maxPos: i32;
+  let result: Result;
+  let succeeded: i32;
+  do {
+    // The current result is the best one -- record it.
+    maxPos = pos;
+    result = newNonterminalNode(origPos, pos, ruleId, origNumBindings);
+    memoizeResult(origPos, ruleId, result);
+    // Reset and try to improve on the current best.
+    pos = origPos;
+    bindings.length = origNumBindings;
+    succeeded = evalRuleBody(ruleId);
+  } while (succeeded && pos > maxPos);
+  pos = maxPos;
+  bindings.length = origNumBindings + 1;
+  bindings[origNumBindings] = result;
+  return succeeded;
+}
+export function evalApply1(ruleId: i32, arg0: i32): Result {
+  // if (hasMemoizedResult(ruleId)) {
+  //   return useMemoizedResult(ruleId);
+  // }
+  const origPos = pos;
+  const origNumBindings = bindings.length;
+  let result: Result = FAIL;
+  const succeeded = call_indirect<Result>(ruleId, arg0);
+  if (succeeded) {
+    const numChildren = bindings.length - origNumBindings;
+    result = newNonterminalNode(origPos, pos, ruleId, origNumBindings);
+  }
+  // memoizeResult(origPos, ruleId, result);
+  return succeeded;
+}
+export function evalApply2(ruleId: i32, arg0: i32, arg1: i32): Result {
+  // if (hasMemoizedResult(ruleId)) {
+  //   return useMemoizedResult(ruleId);
+  // }
+  const origPos = pos;
+  const origNumBindings = bindings.length;
+  let result: Result = FAIL;
+  const succeeded = call_indirect<Result>(ruleId, arg0, arg1);
+  if (succeeded) {
+    const numChildren = bindings.length - origNumBindings;
+    result = newNonterminalNode(origPos, pos, ruleId, origNumBindings);
+  }
+  // memoizeResult(origPos, ruleId, result);
+  return succeeded;
+}
+export function evalApply3(ruleId: i32, arg0: i32, arg1: i32, arg2: i32): Result {
+  // if (hasMemoizedResult(ruleId)) {
+  //   return useMemoizedResult(ruleId);
+  // }
+  const origPos = pos;
+  const origNumBindings = bindings.length;
+  let result: Result = FAIL;
+  const succeeded = call_indirect<Result>(ruleId, arg0, arg1, arg2);
+  if (succeeded) {
+    const numChildren = bindings.length - origNumBindings;
+    result = newNonterminalNode(origPos, pos, ruleId, origNumBindings);
+  }
+  // memoizeResult(origPos, ruleId, result);
+  return succeeded;
+}
+export function newTerminalNode(startIdx: i32, endIdx: i32): usize {
+  const ptr = heap.alloc(CST_NODE_OVERHEAD);
+  cstSetCount(ptr, 0);
+  cstSetMatchLength(ptr, endIdx - startIdx);
+  cstSetType(ptr, -1);
+  bindings.push(ptr);
+  return ptr;
+}
+// Create an internal (non-leaf) node (IterationNode or NonterminalNode).
+@inline function newNonLeafNodeWithType(startIdx: i32, endIdx: i32, type: i32, origNumBindings: i32): usize {
+  const bindingsLen = bindings.length;
+  const numChildren = bindingsLen - origNumBindings;
+  const ptr = heap.alloc(CST_NODE_OVERHEAD + numChildren * 4);
+  cstSetCount(ptr, numChildren);
+  cstSetMatchLength(ptr, endIdx - startIdx);
+  cstSetType(ptr, type);
+  for (let i = 0; i < numChildren; i++) {
+    store<i32>(ptr + CST_NODE_OVERHEAD + i * 4, bindings[bindingsLen - numChildren + i]);
+  }
+  bindings.length = origNumBindings;
+  bindings.push(ptr);
+  return ptr;
+}
+export function newNonterminalNode(startIdx: i32, endIdx: i32, ruleId: i32, origNumBindings: i32): usize {
+  return newNonLeafNodeWithType(startIdx, endIdx, ruleId, origNumBindings);
+}
+export function newIterationNode(startIdx: i32, endIdx: i32, origNumBindings: i32): usize {
+  return newNonLeafNodeWithType(startIdx, endIdx, NODE_TYPE_ITERATION, origNumBindings);
+}
+export function getBindingsLength(): i32 {
+  return bindings.length;
+}
+export function setBindingsLength(len: i32): void {
+  return bindings.length = len;
+}
+export function getCstRoot(): usize {
+  return bindings[0];
+}

package/scripts/bundlewasm.ts ADDED Viewed

@@ -0,0 +1,49 @@
+import * as fs from 'node:fs';
+import {URL} from 'node:url';
+import * as w from '@wasmgroundup/emit';
+import {extractSections} from './modparse.ts';
+/*
+  Extracts the code section from the AssemblyScript release build
+  and writes it to a .ts module in the same directory.
+*/
+const inputPath = process.argv[2];
+const outputPath = inputPath + '_sections.ts';
+const destImportCount = 3;
+const buf = fs.readFileSync(inputPath);
+const sections = extractSections(buf, {
+  destImportCount
+});
+let output = `function decodeBase64(str: string) {
+  const bytes = atob(str)
+  const result: number[] = []
+  for (let i = 0; i < bytes.length; i++) {
+    result[i] = bytes.charCodeAt(i)
+  }
+  return result
+}
+export const destImportCount = ${destImportCount};
+export const startFuncidx = ${sections.startFuncidx};
+`;
+output += `export const funcidxByName = ${JSON.stringify(sections.funcidxByName)};\n`;
+for (const [secName, payload] of Object.entries(sections)) {
+  if (secName === 'funcidxByName' || secName === 'startFuncidx') {
+    continue; // Skip this section as it's already handled above
+  }
+  const {entryCount, contents} = payload;
+  const base64Contents = Buffer.from(contents).toString('base64');
+  output += `export const ${secName} = {
+  entryCount: ${JSON.stringify(entryCount)},
+  contents: decodeBase64(${JSON.stringify(base64Contents)})
+}
+`;
+}
+fs.writeFileSync(outputPath, output, 'utf8');