hammer-parser 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +101 -0
- data/lib/hammer-parser.rb +47 -0
- data/lib/hammer/internal.rb +346 -0
- data/lib/hammer/parser.rb +224 -0
- data/lib/hammer/parser_builder.rb +124 -0
- data/lib/minitest/hamer-parser_plugin.rb +31 -0
- data/test/autogen_test.rb +755 -0
- data/test/parser_test.rb +132 -0
- metadata +83 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b813026f979c544803273ae966bd9f1858e4f510
|
4
|
+
data.tar.gz: b48b3e11a02fc19be47139330a2a14e7c91558f6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d33df5b0e64a59baf7d27ce600edfca7eabf6055fea496b4df5ac148cf036183d88959ac3c3a7d7bc56f23273e132d2409b308cc61177925536a914f7e317b16
|
7
|
+
data.tar.gz: 147863e19142b07d561f44f9ccd6b994d8553c7cd2ef7cf5b6d03b7507ffaa95d0aa76fc63d3d3d438397a599bcee124fbb691478548de7acc7d3765f8a72ca3
|
data/README.md
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
# hammer-parser
|
2
|
+
|
3
|
+
Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsing library.
|
4
|
+
|
5
|
+
|
6
|
+
## Notes
|
7
|
+
|
8
|
+
* I called the gem `hammer-parser`, since there already is a [gem named `hammer`](https://rubygems.org/gems/hammer).
|
9
|
+
|
10
|
+
|
11
|
+
## Development
|
12
|
+
|
13
|
+
1. `cd src/bindings/ruby`.
|
14
|
+
|
15
|
+
2. Run `bundle install` to install dependencies.
|
16
|
+
|
17
|
+
3. Run `bundle console` to open `irb` with hammer loaded.
|
18
|
+
|
19
|
+
4. To run tests, just run `bundle exec rake test`.
|
20
|
+
|
21
|
+
|
22
|
+
## Installation
|
23
|
+
|
24
|
+
1. Download the hammer source code, and make it available system wide with the bindings.
|
25
|
+
|
26
|
+
`git clone https://github.com/UpstandingHackers/hammer`
|
27
|
+
|
28
|
+
`cd hammer`
|
29
|
+
|
30
|
+
`scons bindings=ruby`
|
31
|
+
|
32
|
+
`sudo scons bindings=ruby install`
|
33
|
+
|
34
|
+
2. On linux, you will have to do
|
35
|
+
|
36
|
+
`sudo ldconfig`
|
37
|
+
|
38
|
+
3. Build the gem
|
39
|
+
`gem build hammer-parser.gemspec`
|
40
|
+
|
41
|
+
4. Install the gem
|
42
|
+
`gem install hammer-parser-x.x.x.gem`
|
43
|
+
|
44
|
+
|
45
|
+
## Examples
|
46
|
+
|
47
|
+
Add hammer to your Gemfile.
|
48
|
+
|
49
|
+
`gem 'hammer-parser'`
|
50
|
+
|
51
|
+
Use hammer in your project.
|
52
|
+
|
53
|
+
`require 'hammer-parser'`
|
54
|
+
|
55
|
+
### Building a parser
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
parser = Hammer::Parser.build {
|
59
|
+
token 'Hello '
|
60
|
+
choice {
|
61
|
+
token 'Mom'
|
62
|
+
token 'Dad'
|
63
|
+
}
|
64
|
+
token '!'
|
65
|
+
}
|
66
|
+
```
|
67
|
+
|
68
|
+
Also possible:
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
parser = Hammer::ParserBuilder.new
|
72
|
+
.token('Hello ')
|
73
|
+
.choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad'))
|
74
|
+
.token('!')
|
75
|
+
.build
|
76
|
+
```
|
77
|
+
|
78
|
+
More like hammer in C:
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
h = Hammer::Parser
|
82
|
+
parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!'))
|
83
|
+
```
|
84
|
+
|
85
|
+
### Parsing
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
result = parser.parse 'Hello Mom!'
|
89
|
+
=> #<HParseResult>
|
90
|
+
result = parser.parse 'Hello Someone!'
|
91
|
+
=> nil
|
92
|
+
```
|
93
|
+
|
94
|
+
The `parse` method returns an `HParseResult` object, which needs to be
|
95
|
+
kept around until you're entirely done with the parse tree, which can
|
96
|
+
be accessed with `result.ast`.
|
97
|
+
|
98
|
+
While the AST can be accessed using the same interface as the C
|
99
|
+
HParsedToken type, we recommend using `result.ast.unmarshal` instead.
|
100
|
+
This converts the entire parse tree into a standalone Ruby-native
|
101
|
+
datastructure which will likely be much easier to work with.
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'hammer/internal'
|
2
|
+
require 'hammer/parser'
|
3
|
+
require 'hammer/parser_builder'
|
4
|
+
|
5
|
+
# Leave this in for now to be able to play around with HParseResult in irb.
|
6
|
+
x = nil
|
7
|
+
parser = Hammer::Parser.build {
|
8
|
+
token 'abc'
|
9
|
+
x = indirect
|
10
|
+
end_p
|
11
|
+
}
|
12
|
+
x.bind(Hammer::Parser.token('abd'))
|
13
|
+
|
14
|
+
#$p = parser
|
15
|
+
$r = parser.parse 'abcabd'
|
16
|
+
|
17
|
+
#p $r[:ast][:data][:seq].elements.map {|e| e[:data][:bytes].token }
|
18
|
+
|
19
|
+
|
20
|
+
h = Hammer::Parser
|
21
|
+
parser =
|
22
|
+
h.many(
|
23
|
+
h.action(h.uint8) { |r|
|
24
|
+
#p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}"
|
25
|
+
r.data * 2
|
26
|
+
})
|
27
|
+
|
28
|
+
#parser = Hammer::Parser.build {
|
29
|
+
# many {
|
30
|
+
# uint8
|
31
|
+
# action { |r|
|
32
|
+
# p r
|
33
|
+
# r[:ast]
|
34
|
+
# }
|
35
|
+
# }
|
36
|
+
#}
|
37
|
+
|
38
|
+
$r = parser.parse 'abcdefgh'
|
39
|
+
|
40
|
+
#p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]}
|
41
|
+
# or:
|
42
|
+
#p $r.ast.data.map(&:data)
|
43
|
+
|
44
|
+
|
45
|
+
h = Hammer::Parser
|
46
|
+
parser = h.many(h.attr_bool(h.uint8) { |r| r.data <= 100 })
|
47
|
+
#p parser.parse('abcdefgh').ast.data.map(&:data)
|
@@ -0,0 +1,346 @@
|
|
1
|
+
require 'ffi'
|
2
|
+
|
3
|
+
module Hammer
|
4
|
+
module Internal
|
5
|
+
extend FFI::Library
|
6
|
+
|
7
|
+
ffi_lib 'hammer'
|
8
|
+
|
9
|
+
class DynamicVariable
|
10
|
+
SYMBOL_PREFIX = "Hammer::Internal::DynamicVariable gensym "
|
11
|
+
@@current_symbol = 0
|
12
|
+
|
13
|
+
def initialize(default=nil, name=nil, &block)
|
14
|
+
# This can take either a default value or a block. If a
|
15
|
+
# default value is given, all threads' dynvars are initialized
|
16
|
+
# to that object. If a block is given, the block is lazilly
|
17
|
+
# called on each thread to generate the initial value. If
|
18
|
+
# both a block and a default value are passed, the block is
|
19
|
+
# called with the literal value.
|
20
|
+
@default = default
|
21
|
+
@block = block || Proc.new{|x| x}
|
22
|
+
@@current_symbol += 1
|
23
|
+
@sym = (SYMBOL_PREFIX + @@current_symbol.to_s).to_sym
|
24
|
+
end
|
25
|
+
|
26
|
+
def value
|
27
|
+
if Thread.current.key? @sym
|
28
|
+
return Thread.current[@sym]
|
29
|
+
else
|
30
|
+
return Thread.current[@sym] = @block.call(@default)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def value=(new_value)
|
35
|
+
Thread.current[@sym] = new_value
|
36
|
+
end
|
37
|
+
|
38
|
+
def with(new_value, &block)
|
39
|
+
old_value = value
|
40
|
+
begin
|
41
|
+
self.value = new_value
|
42
|
+
return block.call
|
43
|
+
ensure
|
44
|
+
self.value = old_value
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Maybe we can implement Hammer::Parser with FFI::DataConverter.
|
50
|
+
# That way, most hammer functions won't need to be wrapped.
|
51
|
+
# (Probably need to wrap token, sequence and choice only).
|
52
|
+
# See http://www.elabs.se/blog/61-advanced-topics-in-ruby-ffi
|
53
|
+
typedef :pointer, :h_parser
|
54
|
+
|
55
|
+
class HTokenType
|
56
|
+
extend FFI::DataConverter
|
57
|
+
|
58
|
+
@@known_type_map = {
|
59
|
+
:none => 1,
|
60
|
+
:bytes => 2,
|
61
|
+
:sint => 4,
|
62
|
+
:uint => 8,
|
63
|
+
:sequence => 16,
|
64
|
+
}
|
65
|
+
|
66
|
+
@@inverse_type_map = @@known_type_map.invert
|
67
|
+
|
68
|
+
@@from_hpt = {
|
69
|
+
:none => Proc.new { nil },
|
70
|
+
:bytes => Proc.new {|hpt| hpt[:data][:bytes].token},
|
71
|
+
:sint => Proc.new {|hpt| hpt[:data][:sint]},
|
72
|
+
:uint => Proc.new {|hpt| hpt[:data][:uint]},
|
73
|
+
:sequence => Proc.new {|hpt| hpt[:data][:seq].map {|x| x.unmarshal}},
|
74
|
+
}
|
75
|
+
|
76
|
+
def self.new(name, &block)
|
77
|
+
if name.is_a?(Symbol)
|
78
|
+
name_sym = name
|
79
|
+
name_str = name.to_s
|
80
|
+
else
|
81
|
+
name_str = name.to_s
|
82
|
+
name_sym = name.to_sym
|
83
|
+
end
|
84
|
+
num = Hammer::Internal.h_allocate_token_type(name_str)
|
85
|
+
@@known_type_map[name_sym] = num
|
86
|
+
@@inverse_type_map[num] = name_sym
|
87
|
+
@@from_hpt[name_sym] = block
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.from_name(name)
|
91
|
+
unless @@known_type_map.key? name
|
92
|
+
num = Hammer::Internal.h_get_token_type_number(name.to_s)
|
93
|
+
if num <= 0
|
94
|
+
raise ArgumentError, "Unknown token type #{name}"
|
95
|
+
end
|
96
|
+
@@known_type_map[name] = num
|
97
|
+
@@inverse_type_map[num] = name
|
98
|
+
end
|
99
|
+
return @@known_type_map[name]
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.from_num(num)
|
103
|
+
unless @@inverse_type_map.key? num
|
104
|
+
name = Hammer::Internal.h_get_token_type_name(num)
|
105
|
+
if name.nil?
|
106
|
+
return nil
|
107
|
+
end
|
108
|
+
name = name.to_sym
|
109
|
+
@@known_type_map[name] = num
|
110
|
+
@@inverse_type_map[num] = name
|
111
|
+
end
|
112
|
+
return @@inverse_type_map[num]
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.native_type
|
116
|
+
FFI::Type::INT
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.to_native(val, ctx)
|
120
|
+
return val if val.is_a?(Integer)
|
121
|
+
return from_name(val)
|
122
|
+
end
|
123
|
+
|
124
|
+
def self.from_native(val, ctx)
|
125
|
+
return from_num(val) || val
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Define these as soon as possible, so that they can be used
|
130
|
+
# without fear elsewhere
|
131
|
+
attach_function :h_allocate_token_type, [:string], :int
|
132
|
+
attach_function :h_get_token_type_number, [:string], :int
|
133
|
+
attach_function :h_get_token_type_name, [:int], :string
|
134
|
+
|
135
|
+
class HCountedArray < FFI::Struct
|
136
|
+
layout :capacity, :size_t,
|
137
|
+
:used, :size_t,
|
138
|
+
:arena, :pointer,
|
139
|
+
:elements, :pointer # HParsedToken**
|
140
|
+
|
141
|
+
def length
|
142
|
+
self[:used]
|
143
|
+
end
|
144
|
+
|
145
|
+
def elements
|
146
|
+
elem_array = FFI::Pointer.new(:pointer, self[:elements])
|
147
|
+
return (0...self[:used]).map { |i| HParsedToken.new(elem_array[i].read_pointer) }
|
148
|
+
end
|
149
|
+
|
150
|
+
#def [](idx)
|
151
|
+
# raise ArgumentError, "Index out of range" unless idx >= 0 and idx < length
|
152
|
+
# elem_array = FFI::Pointer.new(:pointer, self[:elements])
|
153
|
+
# return HParsedToken.new(elem_array[i].read_pointer)
|
154
|
+
#end
|
155
|
+
|
156
|
+
def map(&code)
|
157
|
+
elements.map {|x| code.call x}
|
158
|
+
end
|
159
|
+
def each(&code)
|
160
|
+
elements.each {|x| code.call x}
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
class HBytes < FFI::Struct
|
165
|
+
layout :token, :pointer, # uint8_t*
|
166
|
+
:len, :size_t
|
167
|
+
|
168
|
+
def token
|
169
|
+
# TODO: Encoding?
|
170
|
+
# Should be the same encoding as the string the token was created with.
|
171
|
+
# But how do we get to this knowledge at this point?
|
172
|
+
# Cheap solution: Just ask the user (additional parameter with default value of UTF-8).
|
173
|
+
self[:token].read_string(self[:len])
|
174
|
+
end
|
175
|
+
|
176
|
+
# TODO: Probably should rename this to match ruby conventions: length, count, size
|
177
|
+
def len
|
178
|
+
self[:len]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
class HString < FFI::Struct
|
183
|
+
layout :content, HBytes.by_ref,
|
184
|
+
:encoding, :uint64
|
185
|
+
def token
|
186
|
+
return self[:content].token.force_encoding(
|
187
|
+
ObjectSpace._id2ref(self[:encoding]))
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
HTokenType.new(:"com.upstandinghackers.hammer.ruby.encodedStr") {|hpt|
|
192
|
+
hpt.user(HString).token
|
193
|
+
}
|
194
|
+
HTokenType.new(:"com.upstandinghackers.hammer.ruby.object") {|hpt|
|
195
|
+
ObjectSpace._id2ref(hpt[:data][:uint])
|
196
|
+
}
|
197
|
+
|
198
|
+
class HParsedTokenDataUnion < FFI::Union
|
199
|
+
layout :bytes, HBytes.by_value,
|
200
|
+
:sint, :int64,
|
201
|
+
:uint, :uint64,
|
202
|
+
:dbl, :double,
|
203
|
+
:flt, :float,
|
204
|
+
:seq, HCountedArray.by_ref,
|
205
|
+
:user, :pointer
|
206
|
+
end
|
207
|
+
|
208
|
+
class HParsedToken < FFI::Struct
|
209
|
+
layout :token_type, HTokenType,
|
210
|
+
:data, HParsedTokenDataUnion.by_value,
|
211
|
+
:index, :size_t,
|
212
|
+
:bit_offset, :char
|
213
|
+
|
214
|
+
def normalize
|
215
|
+
# If I'm null, return nil.
|
216
|
+
return nil if null?
|
217
|
+
return self
|
218
|
+
end
|
219
|
+
|
220
|
+
def token_type
|
221
|
+
self[:token_type]
|
222
|
+
end
|
223
|
+
|
224
|
+
# TODO: Is this name ok?
|
225
|
+
def data
|
226
|
+
return self[:data][:bytes].token if token_type == :bytes
|
227
|
+
return self[:data][:sint] if token_type == :sint
|
228
|
+
return self[:data][:uint] if token_type == :uint
|
229
|
+
return self[:data][:seq].elements if token_type == :sequence
|
230
|
+
return self[:data][:user] if token_type == :user
|
231
|
+
end
|
232
|
+
|
233
|
+
def bytes
|
234
|
+
raise ArgumentError, 'wrong token type' unless token_type == :bytes
|
235
|
+
self[:data][:bytes]
|
236
|
+
end
|
237
|
+
|
238
|
+
def seq
|
239
|
+
raise ArgumentError, 'wrong token type' unless token_type == :sequence
|
240
|
+
self[:data][:seq]
|
241
|
+
end
|
242
|
+
|
243
|
+
def index
|
244
|
+
self[:index]
|
245
|
+
end
|
246
|
+
|
247
|
+
def bit_offset
|
248
|
+
self[:bit_offset]
|
249
|
+
end
|
250
|
+
|
251
|
+
def user(struct)
|
252
|
+
struct.by_ref.from_native(self[:data][:user], nil)
|
253
|
+
end
|
254
|
+
|
255
|
+
def unmarshal
|
256
|
+
Hammer::Internal::HTokenType.class_variable_get(:@@from_hpt)[token_type].call self
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
class HParseResult < FFI::Struct
|
261
|
+
layout :ast, HParsedToken.by_ref,
|
262
|
+
:bit_length, :long_long,
|
263
|
+
:arena, :pointer
|
264
|
+
|
265
|
+
def ast
|
266
|
+
self[:ast].normalize
|
267
|
+
end
|
268
|
+
|
269
|
+
def bit_length
|
270
|
+
self[:bit_length]
|
271
|
+
end
|
272
|
+
|
273
|
+
def self.release(ptr)
|
274
|
+
Hammer::Internal.h_parse_result_free(ptr) unless ptr.null?
|
275
|
+
end
|
276
|
+
|
277
|
+
def arena_alloc(type)
|
278
|
+
Hammer::Internal.arena_alloc(self[:arena], type)
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def self.arena_alloc(arena, type)
|
283
|
+
ptr = h_arena_malloc(arena, type.size)
|
284
|
+
return type.by_ref.from_native(ptr, nil)
|
285
|
+
end
|
286
|
+
|
287
|
+
# run a parser
|
288
|
+
attach_function :h_parse, [:h_parser, :pointer, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string?
|
289
|
+
|
290
|
+
# build a parser
|
291
|
+
attach_function :h_token, [:buffer_in, :size_t], :h_parser
|
292
|
+
attach_function :h_ch, [:uint8], :h_parser
|
293
|
+
attach_function :h_ch_range, [:uint8, :uint8], :h_parser
|
294
|
+
attach_function :h_int_range, [:h_parser, :int64, :int64], :h_parser
|
295
|
+
attach_function :h_bits, [:size_t, :bool], :h_parser
|
296
|
+
attach_function :h_int64, [], :h_parser
|
297
|
+
attach_function :h_int32, [], :h_parser
|
298
|
+
attach_function :h_int16, [], :h_parser
|
299
|
+
attach_function :h_int8, [], :h_parser
|
300
|
+
attach_function :h_uint64, [], :h_parser
|
301
|
+
attach_function :h_uint32, [], :h_parser
|
302
|
+
attach_function :h_uint16, [], :h_parser
|
303
|
+
attach_function :h_uint8, [], :h_parser
|
304
|
+
attach_function :h_whitespace, [:h_parser], :h_parser
|
305
|
+
attach_function :h_left, [:h_parser, :h_parser], :h_parser
|
306
|
+
attach_function :h_right, [:h_parser, :h_parser], :h_parser
|
307
|
+
attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser
|
308
|
+
attach_function :h_in, [:pointer, :size_t], :h_parser
|
309
|
+
attach_function :h_not_in, [:pointer, :size_t], :h_parser
|
310
|
+
attach_function :h_end_p, [], :h_parser
|
311
|
+
attach_function :h_nothing_p, [], :h_parser
|
312
|
+
attach_function :h_sequence, [:varargs], :h_parser
|
313
|
+
attach_function :h_choice, [:varargs], :h_parser
|
314
|
+
attach_function :h_butnot, [:h_parser, :h_parser], :h_parser
|
315
|
+
attach_function :h_difference, [:h_parser, :h_parser], :h_parser
|
316
|
+
attach_function :h_xor, [:h_parser, :h_parser], :h_parser
|
317
|
+
attach_function :h_many, [:h_parser], :h_parser
|
318
|
+
attach_function :h_many1, [:h_parser], :h_parser
|
319
|
+
attach_function :h_repeat_n, [:h_parser, :size_t], :h_parser
|
320
|
+
attach_function :h_optional, [:h_parser], :h_parser
|
321
|
+
attach_function :h_ignore, [:h_parser], :h_parser
|
322
|
+
attach_function :h_sepBy, [:h_parser, :h_parser], :h_parser
|
323
|
+
attach_function :h_sepBy1, [:h_parser, :h_parser], :h_parser
|
324
|
+
attach_function :h_epsilon_p, [], :h_parser
|
325
|
+
attach_function :h_length_value, [:h_parser, :h_parser], :h_parser
|
326
|
+
attach_function :h_and, [:h_parser], :h_parser
|
327
|
+
attach_function :h_not, [:h_parser], :h_parser
|
328
|
+
|
329
|
+
attach_function :h_indirect, [], :h_parser
|
330
|
+
attach_function :h_bind_indirect, [:h_parser, :h_parser], :void
|
331
|
+
|
332
|
+
callback :HAction, [HParseResult.by_ref], HParsedToken.by_ref
|
333
|
+
attach_function :h_action, [:h_parser, :HAction], :h_parser
|
334
|
+
|
335
|
+
callback :HPredicate, [HParseResult.by_ref], :bool
|
336
|
+
attach_function :h_attr_bool, [:h_parser, :HPredicate], :h_parser
|
337
|
+
|
338
|
+
# free the parse result
|
339
|
+
attach_function :h_parse_result_free, [HParseResult.by_ref], :void
|
340
|
+
|
341
|
+
# TODO: Does the HParser* need to be freed?
|
342
|
+
|
343
|
+
# Add the arena
|
344
|
+
attach_function :h_arena_malloc, [:pointer, :size_t], :pointer
|
345
|
+
end
|
346
|
+
end
|