@ohm-js/wasm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mise.toml +2 -0
- package/AGENT.md +25 -0
- package/LICENSE +21 -0
- package/Makefile +23 -0
- package/README.md +34 -0
- package/TODO.md +28 -0
- package/package.json +32 -0
- package/runtime/ohmRuntime.ts +252 -0
- package/scripts/bundlewasm.ts +49 -0
- package/scripts/modparse.ts +397 -0
- package/src/cli.js +36 -0
- package/src/index.js +1195 -0
- package/test/data/_book-review.liquid +257 -0
- package/test/data/_es5.js +1057 -0
- package/test/data/_es5.wasm +0 -0
- package/test/data/_html5shiv-3.7.3.js +326 -0
- package/test/data/_liquid-html.ohm +605 -0
- package/test/go/README.md +67 -0
- package/test/go/cst.go +164 -0
- package/test/go/go.mod +5 -0
- package/test/go/go.sum +2 -0
- package/test/go/matcher.go +370 -0
- package/test/go/testmain.go +161 -0
- package/test/test-es5.js +104 -0
- package/test/test-liquid-html.js +27 -0
- package/test/test-wasm.js +764 -0
package/test/go/cst.go
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
package main
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"fmt"
|
|
5
|
+
"unsafe"
|
|
6
|
+
|
|
7
|
+
"github.com/tetratelabs/wazero/api"
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
// Code for walking a CST by hand, by accessing the raw memory.
|
|
11
|
+
// Ultimately, we will want a higher-level API for this, but for now,
|
|
12
|
+
// this is useful for testing/debugging.
|
|
13
|
+
|
|
14
|
+
const (
|
|
15
|
+
// Node type constants
|
|
16
|
+
NodeTypeNonterminal = 0
|
|
17
|
+
NodeTypeTerminal = -1
|
|
18
|
+
NodeTypeIter = -2
|
|
19
|
+
|
|
20
|
+
// CST node structure size constants
|
|
21
|
+
cstNodeHeaderSize = 12 // 3 uint32 fields (count, matchLen, type)
|
|
22
|
+
uint32Size = 4
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
// CstNode represents a node in the Concrete Syntax Tree
|
|
26
|
+
type CstNode struct {
|
|
27
|
+
ruleNames []string
|
|
28
|
+
memory api.Memory
|
|
29
|
+
base uint32
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// NewCstNode creates a new CstNode with the given parameters
|
|
33
|
+
func NewCstNode(ruleNames []string, memory api.Memory, offset uint32) *CstNode {
|
|
34
|
+
return &CstNode{
|
|
35
|
+
ruleNames: ruleNames,
|
|
36
|
+
memory: memory,
|
|
37
|
+
base: offset,
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// IsNonterminal returns true if this node represents a nonterminal
|
|
42
|
+
func (n *CstNode) IsNonterminal() bool {
|
|
43
|
+
return n.Type() >= 0
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// IsTerminal returns true if this node represents a terminal
|
|
47
|
+
func (n *CstNode) IsTerminal() bool {
|
|
48
|
+
return n.Type() == NodeTypeTerminal
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// IsIter returns true if this node represents an iteration
|
|
52
|
+
func (n *CstNode) IsIter() bool {
|
|
53
|
+
return n.Type() == NodeTypeIter
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// RuleName returns the name of the rule that created this node
|
|
57
|
+
func (n *CstNode) RuleName() (string, error) {
|
|
58
|
+
data, ok := n.memory.Read(n.base+8, 4)
|
|
59
|
+
if !ok {
|
|
60
|
+
return "", fmt.Errorf("failed to read rule ID at address %d", n.base+8)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
id := readInt32(data, 0)
|
|
64
|
+
if id < 0 {
|
|
65
|
+
return "", nil
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if len(n.ruleNames) == 0 {
|
|
69
|
+
return fmt.Sprintf("rule_%d", id), nil
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if int(id) >= len(n.ruleNames) {
|
|
73
|
+
return fmt.Sprintf("rule_%d", id), nil
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if n.ruleNames[id] == "" {
|
|
77
|
+
return fmt.Sprintf("rule_%d", id), nil
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return n.ruleNames[id], nil
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// count returns the number of child nodes
|
|
84
|
+
func (n *CstNode) count() (uint32, error) {
|
|
85
|
+
data, ok := n.memory.Read(n.base, 4)
|
|
86
|
+
if !ok {
|
|
87
|
+
return 0, fmt.Errorf("failed to read count at address %d", n.base)
|
|
88
|
+
}
|
|
89
|
+
return readUint32(data, 0), nil
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// MatchLength returns the length of the matched text
|
|
93
|
+
func (n *CstNode) MatchLength() (uint32, error) {
|
|
94
|
+
data, ok := n.memory.Read(n.base+4, 4)
|
|
95
|
+
if !ok {
|
|
96
|
+
return 0, fmt.Errorf("failed to read match length at address %d", n.base+4)
|
|
97
|
+
}
|
|
98
|
+
return readUint32(data, 0), nil
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Type returns the type of this node (0 for nonterminal, -1 for terminal, -2 for iter)
|
|
102
|
+
func (n *CstNode) Type() int32 {
|
|
103
|
+
data, ok := n.memory.Read(n.base+8, 4)
|
|
104
|
+
if !ok {
|
|
105
|
+
return 0 // Return nonterminal as default in case of error
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
t := readInt32(data, 0)
|
|
109
|
+
if t < 0 {
|
|
110
|
+
return t
|
|
111
|
+
}
|
|
112
|
+
return NodeTypeNonterminal
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Children returns a slice of child nodes
|
|
116
|
+
func (n *CstNode) Children() ([]*CstNode, error) {
|
|
117
|
+
count, err := n.count()
|
|
118
|
+
if err != nil {
|
|
119
|
+
return nil, err
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if count == 0 {
|
|
123
|
+
return []*CstNode{}, nil
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
children := make([]*CstNode, count)
|
|
127
|
+
|
|
128
|
+
for i := uint32(0); i < count; i++ {
|
|
129
|
+
slotOffset := n.base + cstNodeHeaderSize + i*uint32Size
|
|
130
|
+
data, ok := n.memory.Read(slotOffset, 4)
|
|
131
|
+
if !ok {
|
|
132
|
+
return nil, fmt.Errorf("failed to read child pointer at address %d", slotOffset)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
childOffset := readUint32(data, 0)
|
|
136
|
+
children[i] = NewCstNode(n.ruleNames, n.memory, childOffset)
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return children, nil
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Helper functions for reading values from memory
|
|
143
|
+
func readUint32(data []byte, offset uint32) uint32 {
|
|
144
|
+
return *(*uint32)(unsafe.Pointer(&data[offset]))
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
func readInt32(data []byte, offset uint32) int32 {
|
|
148
|
+
return *(*int32)(unsafe.Pointer(&data[offset]))
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// GetCstRoot returns the root node of the CST from the WasmMatcher
|
|
152
|
+
func GetCstRoot(matcher *WasmMatcher, ruleNames []string) (*CstNode, error) {
|
|
153
|
+
rootAddr, err := matcher.GetCstRoot()
|
|
154
|
+
if err != nil {
|
|
155
|
+
return nil, fmt.Errorf("failed to get CST root: %v", err)
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
memory := matcher.GetModule().Memory()
|
|
159
|
+
if memory == nil {
|
|
160
|
+
return nil, fmt.Errorf("WebAssembly module has no memory")
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return NewCstNode(ruleNames, memory, rootAddr), nil
|
|
164
|
+
}
|
package/test/go/go.mod
ADDED
package/test/go/go.sum
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
package main
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"context"
|
|
5
|
+
"encoding/binary"
|
|
6
|
+
"fmt"
|
|
7
|
+
"io"
|
|
8
|
+
"os"
|
|
9
|
+
|
|
10
|
+
"github.com/tetratelabs/wazero"
|
|
11
|
+
"github.com/tetratelabs/wazero/api"
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
// Constants for memory layout
|
|
15
|
+
const (
|
|
16
|
+
wasmPageSize = 64 * 1024
|
|
17
|
+
InputBufferOffset = wasmPageSize
|
|
18
|
+
InputBufferSize = wasmPageSize
|
|
19
|
+
MemoTableOffset = InputBufferOffset + InputBufferSize
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
// WasmMatcher is a Go implementation of the JavaScript WasmMatcher class for Ohm
|
|
23
|
+
type WasmMatcher struct {
|
|
24
|
+
runtime wazero.Runtime
|
|
25
|
+
module api.Module
|
|
26
|
+
input string
|
|
27
|
+
pos int
|
|
28
|
+
ctx context.Context
|
|
29
|
+
ruleIds map[string]int
|
|
30
|
+
defaultStartRule string
|
|
31
|
+
lastMatchResult bool
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// GetModule returns the WebAssembly module
|
|
35
|
+
func (m *WasmMatcher) GetModule() api.Module {
|
|
36
|
+
return m.module
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
func NewWasmMatcher(ctx context.Context) *WasmMatcher {
|
|
40
|
+
// Create a new runtime with custom sections enabled
|
|
41
|
+
config := wazero.NewRuntimeConfig().WithCustomSections(true)
|
|
42
|
+
|
|
43
|
+
return &WasmMatcher{
|
|
44
|
+
runtime: wazero.NewRuntimeWithConfig(ctx, config),
|
|
45
|
+
ctx: ctx,
|
|
46
|
+
ruleIds: make(map[string]int),
|
|
47
|
+
pos: 0,
|
|
48
|
+
lastMatchResult: false,
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// parseRuleNames parses the rule names from the custom section data
|
|
53
|
+
// The data is formatted as a WebAssembly vector of strings (each string is a length-prefixed UTF-8 bytes)
|
|
54
|
+
// with LEB128-encoded lengths
|
|
55
|
+
func parseRuleNames(data []byte) ([]string, error) {
|
|
56
|
+
if len(data) == 0 {
|
|
57
|
+
return nil, fmt.Errorf("empty custom section data")
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Read the number of names (vec length) as LEB128-encoded uint32
|
|
61
|
+
numNamesUint64, bytesRead := binary.Uvarint(data)
|
|
62
|
+
if bytesRead <= 0 {
|
|
63
|
+
return nil, fmt.Errorf("failed to read number of names: %v", io.ErrUnexpectedEOF)
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Ensure the value fits in uint32
|
|
67
|
+
if numNamesUint64 > uint64(^uint32(0)) {
|
|
68
|
+
return nil, fmt.Errorf("number of names exceeds maximum uint32 value")
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
numNames := uint32(numNamesUint64)
|
|
72
|
+
data = data[bytesRead:]
|
|
73
|
+
|
|
74
|
+
names := make([]string, numNames)
|
|
75
|
+
for i := uint32(0); i < numNames; i++ {
|
|
76
|
+
// Read the length of the name as LEB128-encoded uint32
|
|
77
|
+
nameLenUint64, bytesRead := binary.Uvarint(data)
|
|
78
|
+
if bytesRead <= 0 {
|
|
79
|
+
return nil, fmt.Errorf("failed to read name length: %v", io.ErrUnexpectedEOF)
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Ensure the value fits in uint32
|
|
83
|
+
if nameLenUint64 > uint64(^uint32(0)) {
|
|
84
|
+
return nil, fmt.Errorf("name length exceeds maximum uint32 value")
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
nameLen := uint32(nameLenUint64)
|
|
88
|
+
data = data[bytesRead:]
|
|
89
|
+
|
|
90
|
+
// Ensure we have enough bytes to read
|
|
91
|
+
if uint64(len(data)) < uint64(nameLen) {
|
|
92
|
+
return nil, fmt.Errorf("buffer too small to read name bytes")
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Read the name bytes
|
|
96
|
+
nameBytes := data[:nameLen]
|
|
97
|
+
data = data[nameLen:]
|
|
98
|
+
|
|
99
|
+
// Convert to string
|
|
100
|
+
names[i] = string(nameBytes)
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return names, nil
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
func (m *WasmMatcher) LoadModule(wasmPath string) error {
|
|
107
|
+
// Read the WASM file
|
|
108
|
+
wasmBytes, err := os.ReadFile(wasmPath)
|
|
109
|
+
if err != nil {
|
|
110
|
+
return fmt.Errorf("error reading WASM file: %v", err)
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Create the env module with required host functions
|
|
114
|
+
_, err = m.runtime.NewHostModuleBuilder("env").
|
|
115
|
+
NewFunctionBuilder().
|
|
116
|
+
WithFunc(func(a, b, c, d int32) {
|
|
117
|
+
panic("WebAssembly module aborted execution")
|
|
118
|
+
}).
|
|
119
|
+
Export("abort").
|
|
120
|
+
NewFunctionBuilder().
|
|
121
|
+
WithFunc(func(val uint32) {
|
|
122
|
+
fmt.Printf("WASM debug: %d\n", val)
|
|
123
|
+
}).
|
|
124
|
+
Export("printI32").
|
|
125
|
+
NewFunctionBuilder().
|
|
126
|
+
WithFunc(func(ctx context.Context, mod api.Module, offset, maxLen uint32) uint32 {
|
|
127
|
+
return m.fillInputBuffer(ctx, mod, offset, maxLen)
|
|
128
|
+
}).
|
|
129
|
+
Export("fillInputBuffer").
|
|
130
|
+
Instantiate(m.ctx)
|
|
131
|
+
|
|
132
|
+
if err != nil {
|
|
133
|
+
return fmt.Errorf("failed to create host module: %v", err)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// First compile the module to access the custom sections
|
|
137
|
+
compiledModule, err := m.runtime.CompileModule(m.ctx, wasmBytes)
|
|
138
|
+
if err != nil {
|
|
139
|
+
return fmt.Errorf("error compiling module: %v", err)
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Get all custom sections from the module
|
|
143
|
+
customSections := compiledModule.CustomSections()
|
|
144
|
+
if customSections == nil {
|
|
145
|
+
return fmt.Errorf("no custom sections found in module")
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
var ruleNamesSection api.CustomSection
|
|
149
|
+
for _, section := range customSections {
|
|
150
|
+
if section.Name() == "ruleNames" {
|
|
151
|
+
ruleNamesSection = section
|
|
152
|
+
break
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if ruleNamesSection == nil {
|
|
157
|
+
return fmt.Errorf("required custom section 'ruleNames' not found")
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Parse rule names from the custom section data
|
|
161
|
+
ruleNames, err := parseRuleNames(ruleNamesSection.Data())
|
|
162
|
+
if err != nil {
|
|
163
|
+
return fmt.Errorf("failed to parse rule names from custom section: %v", err)
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Now instantiate the module
|
|
167
|
+
m.module, err = m.runtime.InstantiateModule(m.ctx, compiledModule, wazero.NewModuleConfig())
|
|
168
|
+
if err != nil {
|
|
169
|
+
return fmt.Errorf("error instantiating module: %v", err)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Build the ruleIds map (mapping from name to index)
|
|
173
|
+
m.ruleIds = make(map[string]int, len(ruleNames))
|
|
174
|
+
for i, name := range ruleNames {
|
|
175
|
+
m.ruleIds[name] = i
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Set the default start rule to the first rule
|
|
179
|
+
if len(ruleNames) > 0 {
|
|
180
|
+
m.defaultStartRule = ruleNames[0]
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return nil
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
func (m *WasmMatcher) SetInput(input string) {
|
|
187
|
+
if m.input != input {
|
|
188
|
+
m.input = input
|
|
189
|
+
m.pos = 0
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// SetInputFromFile reads input from a file and sets it as the current input
|
|
194
|
+
func (m *WasmMatcher) SetInputFromFile(filePath string) error {
|
|
195
|
+
data, err := os.ReadFile(filePath)
|
|
196
|
+
if err != nil {
|
|
197
|
+
return fmt.Errorf("error reading input file: %v", err)
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
m.SetInput(string(data))
|
|
201
|
+
return nil
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
func (m *WasmMatcher) GetInput() string {
|
|
205
|
+
return m.input
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
func (m *WasmMatcher) Match() (bool, error) {
|
|
209
|
+
result, err := m.MatchRule(m.defaultStartRule)
|
|
210
|
+
if err == nil {
|
|
211
|
+
m.lastMatchResult = result
|
|
212
|
+
}
|
|
213
|
+
return result, err
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
func (m *WasmMatcher) MatchRule(ruleName string) (bool, error) {
|
|
217
|
+
m.pos = 0 // Reset position
|
|
218
|
+
|
|
219
|
+
// Get the rule ID
|
|
220
|
+
ruleId := uint64(0) // Default to 0 (start rule)
|
|
221
|
+
if ruleName != "" {
|
|
222
|
+
if id, ok := m.ruleIds[ruleName]; ok {
|
|
223
|
+
ruleId = uint64(id)
|
|
224
|
+
} else {
|
|
225
|
+
return false, fmt.Errorf("rule not found: %s", ruleName)
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Call the match function
|
|
230
|
+
matchFunc := m.module.ExportedFunction("match")
|
|
231
|
+
if matchFunc == nil {
|
|
232
|
+
return false, fmt.Errorf("match function not exported by module")
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
results, err := matchFunc.Call(m.ctx, ruleId)
|
|
236
|
+
if err != nil {
|
|
237
|
+
return false, fmt.Errorf("error calling match function: %v", err)
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Non-zero result means success
|
|
241
|
+
result := results[0] != 0
|
|
242
|
+
m.lastMatchResult = result
|
|
243
|
+
return result, nil
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// GetCstRoot returns the root node address of the concrete syntax tree
|
|
247
|
+
func (m *WasmMatcher) GetCstRoot() (uint32, error) {
|
|
248
|
+
getCstRootFunc := m.module.ExportedFunction("getCstRoot")
|
|
249
|
+
if getCstRootFunc == nil {
|
|
250
|
+
return 0, fmt.Errorf("getCstRoot function not exported by module")
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
results, err := getCstRootFunc.Call(m.ctx)
|
|
254
|
+
if err != nil {
|
|
255
|
+
return 0, fmt.Errorf("error getting CST root: %v", err)
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return uint32(results[0]), nil
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// GetCstNode returns a CstNode object for the current parse tree
|
|
262
|
+
func (m *WasmMatcher) GetCstNode() (*CstNode, error) {
|
|
263
|
+
rootAddr, err := m.GetCstRoot()
|
|
264
|
+
if err != nil {
|
|
265
|
+
return nil, fmt.Errorf("failed to get CST root: %v", err)
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
memory := m.module.Memory()
|
|
269
|
+
if memory == nil {
|
|
270
|
+
return nil, fmt.Errorf("WebAssembly module has no memory")
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return NewCstNode(m.GetRuleNames(), memory, rootAddr), nil
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// GetRuleNames returns the list of rule names in the grammar
|
|
277
|
+
func (m *WasmMatcher) GetRuleNames() []string {
|
|
278
|
+
// If we have real rule names, use them
|
|
279
|
+
if len(m.ruleIds) > 0 {
|
|
280
|
+
// Convert the rule IDs map to a slice of rule names
|
|
281
|
+
maxID := 0
|
|
282
|
+
for _, id := range m.ruleIds {
|
|
283
|
+
if id > maxID {
|
|
284
|
+
maxID = id
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
ruleNames := make([]string, maxID+1)
|
|
289
|
+
for name, id := range m.ruleIds {
|
|
290
|
+
ruleNames[id] = name
|
|
291
|
+
}
|
|
292
|
+
return ruleNames
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// If we don't have rule names yet, create placeholder names for ES5 grammar
|
|
296
|
+
// This is just a fallback to make tests work
|
|
297
|
+
const expectedRuleCount = 100 // More than enough for most grammars
|
|
298
|
+
ruleNames := make([]string, expectedRuleCount)
|
|
299
|
+
ruleNames[0] = "Program" // Common name for the start rule
|
|
300
|
+
|
|
301
|
+
for i := 1; i < expectedRuleCount; i++ {
|
|
302
|
+
ruleNames[i] = fmt.Sprintf("rule_%d", i)
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
return ruleNames
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// fillInputBuffer is called by the WebAssembly module to get more input
|
|
309
|
+
func (m *WasmMatcher) fillInputBuffer(ctx context.Context, mod api.Module, offset, maxLen uint32) uint32 {
|
|
310
|
+
// Determine how much of the input to copy
|
|
311
|
+
remaining := m.input[m.pos:]
|
|
312
|
+
bytesToWrite := len(remaining)
|
|
313
|
+
if bytesToWrite > int(maxLen) {
|
|
314
|
+
bytesToWrite = int(maxLen)
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Get module memory
|
|
318
|
+
memory := mod.Memory()
|
|
319
|
+
if memory == nil {
|
|
320
|
+
panic("WebAssembly module has no memory")
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Write to WebAssembly memory
|
|
324
|
+
if bytesToWrite > 0 {
|
|
325
|
+
// Create a buffer with our input data
|
|
326
|
+
inputData := []byte(remaining[:bytesToWrite])
|
|
327
|
+
|
|
328
|
+
// Write directly to WebAssembly memory
|
|
329
|
+
memory.Write(InputBufferOffset+offset, inputData)
|
|
330
|
+
|
|
331
|
+
// Mark end of input with 0xFF (invalid UTF-8 character)
|
|
332
|
+
memory.WriteByte(InputBufferOffset+offset+uint32(bytesToWrite), 0xFF)
|
|
333
|
+
|
|
334
|
+
// Update position
|
|
335
|
+
m.pos += bytesToWrite
|
|
336
|
+
} else {
|
|
337
|
+
// No more input, just write the terminator
|
|
338
|
+
memory.WriteByte(InputBufferOffset+offset, 0xFF)
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
return uint32(bytesToWrite)
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// GetRuleId returns the ID for a rule name
|
|
345
|
+
func (m *WasmMatcher) GetRuleId(ruleName string) (int, bool) {
|
|
346
|
+
id, ok := m.ruleIds[ruleName]
|
|
347
|
+
return id, ok
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// SetDefaultStartRule sets the default start rule for matching
|
|
351
|
+
func (m *WasmMatcher) SetDefaultStartRule(ruleName string) {
|
|
352
|
+
m.defaultStartRule = ruleName
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// Close releases all resources
|
|
356
|
+
func (m *WasmMatcher) Close() error {
|
|
357
|
+
if m.module != nil {
|
|
358
|
+
if err := m.module.Close(m.ctx); err != nil {
|
|
359
|
+
return err
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
if m.runtime != nil {
|
|
364
|
+
if err := m.runtime.Close(m.ctx); err != nil {
|
|
365
|
+
return err
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
return nil
|
|
370
|
+
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
package main
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"context"
|
|
5
|
+
"flag"
|
|
6
|
+
"fmt"
|
|
7
|
+
"os"
|
|
8
|
+
"path/filepath"
|
|
9
|
+
"strings"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
func main() {
|
|
13
|
+
fmt.Println("Ohm WebAssembly Matcher - Go Implementation")
|
|
14
|
+
// Parse command line arguments
|
|
15
|
+
wasmFile := flag.String("wasm", "test/data/_add.wasm", "Path to WebAssembly file")
|
|
16
|
+
inputText := flag.String("input", "", "Input text to match against the grammar")
|
|
17
|
+
inputFile := flag.String("file", "", "Path to file containing input text to match")
|
|
18
|
+
startRule := flag.String("rule", "", "Start rule for the grammar (defaults to grammar's start rule)")
|
|
19
|
+
verbose := flag.Bool("verbose", false, "Display verbose information about CST nodes")
|
|
20
|
+
flag.Parse()
|
|
21
|
+
|
|
22
|
+
// Create a context
|
|
23
|
+
ctx := context.Background()
|
|
24
|
+
|
|
25
|
+
// Create a new WasmMatcher
|
|
26
|
+
matcher := NewWasmMatcher(ctx)
|
|
27
|
+
defer matcher.Close()
|
|
28
|
+
|
|
29
|
+
// Load the WebAssembly module
|
|
30
|
+
wasmPath := *wasmFile
|
|
31
|
+
err := matcher.LoadModule(wasmPath)
|
|
32
|
+
if err != nil {
|
|
33
|
+
fmt.Printf("Error loading module: %v\n", err)
|
|
34
|
+
os.Exit(1)
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
fmt.Printf("Loaded WebAssembly module: %s\n", filepath.Base(wasmPath))
|
|
38
|
+
|
|
39
|
+
// Set the input text - either from direct text or from file
|
|
40
|
+
if *inputFile != "" {
|
|
41
|
+
// Read input from file
|
|
42
|
+
err = matcher.SetInputFromFile(*inputFile)
|
|
43
|
+
if err != nil {
|
|
44
|
+
fmt.Printf("Error reading input file: %v\n", err)
|
|
45
|
+
os.Exit(1)
|
|
46
|
+
}
|
|
47
|
+
} else if *inputText != "" {
|
|
48
|
+
// Set the input text directly
|
|
49
|
+
matcher.SetInput(*inputText)
|
|
50
|
+
} else {
|
|
51
|
+
fmt.Println("No input provided. Use -input flag to provide text or -file to specify an input file.")
|
|
52
|
+
os.Exit(0)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// If a start rule was specified, set it
|
|
56
|
+
if *startRule != "" {
|
|
57
|
+
fmt.Printf("Using rule: %s\n", *startRule)
|
|
58
|
+
matcher.SetDefaultStartRule(*startRule)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Attempt to match the input
|
|
62
|
+
if *inputFile != "" {
|
|
63
|
+
fmt.Printf("Matching input file: %s\n", *inputFile)
|
|
64
|
+
} else {
|
|
65
|
+
fmt.Printf("Matching input: %q\n", matcher.GetInput())
|
|
66
|
+
}
|
|
67
|
+
success, err := matcher.Match()
|
|
68
|
+
if err != nil {
|
|
69
|
+
fmt.Printf("Error during matching: %v\n", err)
|
|
70
|
+
os.Exit(1)
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if success {
|
|
74
|
+
fmt.Println("Match succeeded")
|
|
75
|
+
|
|
76
|
+
// Try to get the CST root
|
|
77
|
+
cstRoot, err := matcher.GetCstRoot()
|
|
78
|
+
if err != nil {
|
|
79
|
+
fmt.Printf("Error getting CST root: %v\n", err)
|
|
80
|
+
} else {
|
|
81
|
+
if *verbose {
|
|
82
|
+
fmt.Printf("CST root node ID: %d\n", cstRoot)
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Create a CST node from the root address
|
|
86
|
+
ruleNames := matcher.GetRuleNames()
|
|
87
|
+
node := NewCstNode(ruleNames, matcher.GetModule().Memory(), cstRoot)
|
|
88
|
+
|
|
89
|
+
nodeType := node.Type()
|
|
90
|
+
if *verbose {
|
|
91
|
+
fmt.Printf("CST Node - Type: %d\n", nodeType)
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Unparse the CST to get the original text
|
|
95
|
+
unparsedText := unparse(node, matcher.GetInput())
|
|
96
|
+
if unparsedText == matcher.GetInput() {
|
|
97
|
+
fmt.Println("Unparsed text matches input")
|
|
98
|
+
} else {
|
|
99
|
+
fmt.Println("ERROR: Unparsed text does not match input")
|
|
100
|
+
fmt.Printf("Unparsed text: %q\n", unparsedText)
|
|
101
|
+
fmt.Printf("Original input: %q\n", matcher.GetInput())
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Display verbose information about node types if requested
|
|
105
|
+
if *verbose {
|
|
106
|
+
fmt.Println("\nNode Types:")
|
|
107
|
+
fmt.Printf(" - Terminal nodes (type %d): Leaf nodes that consume input\n", NodeTypeTerminal)
|
|
108
|
+
fmt.Printf(" - Iteration nodes (type %d): Used for repetition operations\n", NodeTypeIter)
|
|
109
|
+
fmt.Printf(" - Non-terminal nodes (type %d): Internal nodes with children\n", NodeTypeNonterminal)
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
} else {
|
|
113
|
+
fmt.Println("Match failed")
|
|
114
|
+
os.Exit(1)
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// unparse walks the CST starting from the given node and reconstructs the original text
|
|
119
|
+
// It returns the reconstructed text from the terminal nodes
|
|
120
|
+
func unparse(node *CstNode, input string) string {
|
|
121
|
+
var result strings.Builder
|
|
122
|
+
pos := uint32(0)
|
|
123
|
+
unparseNode(node, &pos, input, &result)
|
|
124
|
+
return result.String()
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// unparseNode is a helper function that recursively processes nodes and builds the result
|
|
128
|
+
func unparseNode(node *CstNode, pos *uint32, input string, result *strings.Builder) {
|
|
129
|
+
// Handle terminal nodes - append the consumed text to the result
|
|
130
|
+
if node.IsTerminal() {
|
|
131
|
+
matchLen, err := node.MatchLength()
|
|
132
|
+
if err != nil {
|
|
133
|
+
fmt.Printf("Error getting match length: %v\n", err)
|
|
134
|
+
return
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if *pos < uint32(len(input)) && matchLen > 0 {
|
|
138
|
+
end := *pos + matchLen
|
|
139
|
+
if end > uint32(len(input)) {
|
|
140
|
+
end = uint32(len(input))
|
|
141
|
+
}
|
|
142
|
+
matchedText := input[*pos:end]
|
|
143
|
+
result.WriteString(matchedText)
|
|
144
|
+
|
|
145
|
+
// Update position only after processing terminal nodes
|
|
146
|
+
*pos += matchLen
|
|
147
|
+
}
|
|
148
|
+
return
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// For all other node types (nonterminal, iteration, etc.), process children recursively
|
|
152
|
+
children, err := node.Children()
|
|
153
|
+
if err != nil {
|
|
154
|
+
fmt.Printf("Error getting children: %v\n", err)
|
|
155
|
+
return
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
for _, child := range children {
|
|
159
|
+
unparseNode(child, pos, input, result)
|
|
160
|
+
}
|
|
161
|
+
}
|