hammer-parser 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +101 -0
- data/lib/hammer-parser.rb +47 -0
- data/lib/hammer/internal.rb +346 -0
- data/lib/hammer/parser.rb +224 -0
- data/lib/hammer/parser_builder.rb +124 -0
- data/lib/minitest/hamer-parser_plugin.rb +31 -0
- data/test/autogen_test.rb +755 -0
- data/test/parser_test.rb +132 -0
- metadata +83 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b813026f979c544803273ae966bd9f1858e4f510
|
4
|
+
data.tar.gz: b48b3e11a02fc19be47139330a2a14e7c91558f6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d33df5b0e64a59baf7d27ce600edfca7eabf6055fea496b4df5ac148cf036183d88959ac3c3a7d7bc56f23273e132d2409b308cc61177925536a914f7e317b16
|
7
|
+
data.tar.gz: 147863e19142b07d561f44f9ccd6b994d8553c7cd2ef7cf5b6d03b7507ffaa95d0aa76fc63d3d3d438397a599bcee124fbb691478548de7acc7d3765f8a72ca3
|
data/README.md
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
# hammer-parser
|
2
|
+
|
3
|
+
Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsing library.
|
4
|
+
|
5
|
+
|
6
|
+
## Notes
|
7
|
+
|
8
|
+
* I called the gem `hammer-parser`, since there already is a [gem named `hammer`](https://rubygems.org/gems/hammer).
|
9
|
+
|
10
|
+
|
11
|
+
## Development
|
12
|
+
|
13
|
+
1. `cd src/bindings/ruby`.
|
14
|
+
|
15
|
+
2. Run `bundle install` to install dependencies.
|
16
|
+
|
17
|
+
3. Run `bundle console` to open `irb` with hammer loaded.
|
18
|
+
|
19
|
+
4. To run tests, just run `bundle exec rake test`.
|
20
|
+
|
21
|
+
|
22
|
+
## Installation
|
23
|
+
|
24
|
+
1. Download the hammer source code, and make it available system wide with the bindings.
|
25
|
+
|
26
|
+
`git clone https://github.com/UpstandingHackers/hammer`
|
27
|
+
|
28
|
+
`cd hammer`
|
29
|
+
|
30
|
+
`scons bindings=ruby`
|
31
|
+
|
32
|
+
`sudo scons bindings=ruby install`
|
33
|
+
|
34
|
+
2. On linux, you will have to do
|
35
|
+
|
36
|
+
`sudo ldconfig`
|
37
|
+
|
38
|
+
3. Build the gem
|
39
|
+
`gem build hammer-parser.gemspec`
|
40
|
+
|
41
|
+
4. Install the gem
|
42
|
+
`gem install hammer-parser-x.x.x.gem`
|
43
|
+
|
44
|
+
|
45
|
+
## Examples
|
46
|
+
|
47
|
+
Add hammer to your Gemfile.
|
48
|
+
|
49
|
+
`gem 'hammer-parser'`
|
50
|
+
|
51
|
+
Use hammer in your project.
|
52
|
+
|
53
|
+
`require 'hammer-parser'`
|
54
|
+
|
55
|
+
### Building a parser
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
parser = Hammer::Parser.build {
|
59
|
+
token 'Hello '
|
60
|
+
choice {
|
61
|
+
token 'Mom'
|
62
|
+
token 'Dad'
|
63
|
+
}
|
64
|
+
token '!'
|
65
|
+
}
|
66
|
+
```
|
67
|
+
|
68
|
+
Also possible:
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
parser = Hammer::ParserBuilder.new
|
72
|
+
.token('Hello ')
|
73
|
+
.choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad'))
|
74
|
+
.token('!')
|
75
|
+
.build
|
76
|
+
```
|
77
|
+
|
78
|
+
More like hammer in C:
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
h = Hammer::Parser
|
82
|
+
parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!'))
|
83
|
+
```
|
84
|
+
|
85
|
+
### Parsing
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
result = parser.parse 'Hello Mom!'
|
89
|
+
=> #<HParseResult>
|
90
|
+
result = parser.parse 'Hello Someone!'
|
91
|
+
=> nil
|
92
|
+
```
|
93
|
+
|
94
|
+
The `parse` method returns an `HParseResult` object, which needs to be
|
95
|
+
kept around until you're entirely done with the parse tree, which can
|
96
|
+
be accessed with `result.ast`.
|
97
|
+
|
98
|
+
While the AST can be accessed using the same interface as the C
|
99
|
+
HParsedToken type, we recommend using `result.ast.unmarshal` instead.
|
100
|
+
This converts the entire parse tree into a standalone Ruby-native
|
101
|
+
datastructure which will likely be much easier to work with.
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'hammer/internal'
|
2
|
+
require 'hammer/parser'
|
3
|
+
require 'hammer/parser_builder'
|
4
|
+
|
5
|
+
# Leave this in for now to be able to play around with HParseResult in irb.
|
6
|
+
x = nil
|
7
|
+
parser = Hammer::Parser.build {
|
8
|
+
token 'abc'
|
9
|
+
x = indirect
|
10
|
+
end_p
|
11
|
+
}
|
12
|
+
x.bind(Hammer::Parser.token('abd'))
|
13
|
+
|
14
|
+
#$p = parser
|
15
|
+
$r = parser.parse 'abcabd'
|
16
|
+
|
17
|
+
#p $r[:ast][:data][:seq].elements.map {|e| e[:data][:bytes].token }
|
18
|
+
|
19
|
+
|
20
|
+
h = Hammer::Parser
|
21
|
+
parser =
|
22
|
+
h.many(
|
23
|
+
h.action(h.uint8) { |r|
|
24
|
+
#p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}"
|
25
|
+
r.data * 2
|
26
|
+
})
|
27
|
+
|
28
|
+
#parser = Hammer::Parser.build {
|
29
|
+
# many {
|
30
|
+
# uint8
|
31
|
+
# action { |r|
|
32
|
+
# p r
|
33
|
+
# r[:ast]
|
34
|
+
# }
|
35
|
+
# }
|
36
|
+
#}
|
37
|
+
|
38
|
+
$r = parser.parse 'abcdefgh'
|
39
|
+
|
40
|
+
#p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]}
|
41
|
+
# or:
|
42
|
+
#p $r.ast.data.map(&:data)
|
43
|
+
|
44
|
+
|
45
|
+
h = Hammer::Parser
|
46
|
+
parser = h.many(h.attr_bool(h.uint8) { |r| r.data <= 100 })
|
47
|
+
#p parser.parse('abcdefgh').ast.data.map(&:data)
|
@@ -0,0 +1,346 @@
|
|
1
|
+
require 'ffi'
|
2
|
+
|
3
|
+
module Hammer
|
4
|
+
module Internal
|
5
|
+
extend FFI::Library
|
6
|
+
|
7
|
+
ffi_lib 'hammer'
|
8
|
+
|
9
|
+
class DynamicVariable
|
10
|
+
SYMBOL_PREFIX = "Hammer::Internal::DynamicVariable gensym "
|
11
|
+
@@current_symbol = 0
|
12
|
+
|
13
|
+
def initialize(default=nil, name=nil, &block)
|
14
|
+
# This can take either a default value or a block. If a
|
15
|
+
# default value is given, all threads' dynvars are initialized
|
16
|
+
# to that object. If a block is given, the block is lazilly
|
17
|
+
# called on each thread to generate the initial value. If
|
18
|
+
# both a block and a default value are passed, the block is
|
19
|
+
# called with the literal value.
|
20
|
+
@default = default
|
21
|
+
@block = block || Proc.new{|x| x}
|
22
|
+
@@current_symbol += 1
|
23
|
+
@sym = (SYMBOL_PREFIX + @@current_symbol.to_s).to_sym
|
24
|
+
end
|
25
|
+
|
26
|
+
def value
|
27
|
+
if Thread.current.key? @sym
|
28
|
+
return Thread.current[@sym]
|
29
|
+
else
|
30
|
+
return Thread.current[@sym] = @block.call(@default)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def value=(new_value)
|
35
|
+
Thread.current[@sym] = new_value
|
36
|
+
end
|
37
|
+
|
38
|
+
def with(new_value, &block)
|
39
|
+
old_value = value
|
40
|
+
begin
|
41
|
+
self.value = new_value
|
42
|
+
return block.call
|
43
|
+
ensure
|
44
|
+
self.value = old_value
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Maybe we can implement Hammer::Parser with FFI::DataConverter.
|
50
|
+
# That way, most hammer functions won't need to be wrapped.
|
51
|
+
# (Probably need to wrap token, sequence and choice only).
|
52
|
+
# See http://www.elabs.se/blog/61-advanced-topics-in-ruby-ffi
|
53
|
+
typedef :pointer, :h_parser
|
54
|
+
|
55
|
+
class HTokenType
|
56
|
+
extend FFI::DataConverter
|
57
|
+
|
58
|
+
@@known_type_map = {
|
59
|
+
:none => 1,
|
60
|
+
:bytes => 2,
|
61
|
+
:sint => 4,
|
62
|
+
:uint => 8,
|
63
|
+
:sequence => 16,
|
64
|
+
}
|
65
|
+
|
66
|
+
@@inverse_type_map = @@known_type_map.invert
|
67
|
+
|
68
|
+
@@from_hpt = {
|
69
|
+
:none => Proc.new { nil },
|
70
|
+
:bytes => Proc.new {|hpt| hpt[:data][:bytes].token},
|
71
|
+
:sint => Proc.new {|hpt| hpt[:data][:sint]},
|
72
|
+
:uint => Proc.new {|hpt| hpt[:data][:uint]},
|
73
|
+
:sequence => Proc.new {|hpt| hpt[:data][:seq].map {|x| x.unmarshal}},
|
74
|
+
}
|
75
|
+
|
76
|
+
def self.new(name, &block)
|
77
|
+
if name.is_a?(Symbol)
|
78
|
+
name_sym = name
|
79
|
+
name_str = name.to_s
|
80
|
+
else
|
81
|
+
name_str = name.to_s
|
82
|
+
name_sym = name.to_sym
|
83
|
+
end
|
84
|
+
num = Hammer::Internal.h_allocate_token_type(name_str)
|
85
|
+
@@known_type_map[name_sym] = num
|
86
|
+
@@inverse_type_map[num] = name_sym
|
87
|
+
@@from_hpt[name_sym] = block
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.from_name(name)
|
91
|
+
unless @@known_type_map.key? name
|
92
|
+
num = Hammer::Internal.h_get_token_type_number(name.to_s)
|
93
|
+
if num <= 0
|
94
|
+
raise ArgumentError, "Unknown token type #{name}"
|
95
|
+
end
|
96
|
+
@@known_type_map[name] = num
|
97
|
+
@@inverse_type_map[num] = name
|
98
|
+
end
|
99
|
+
return @@known_type_map[name]
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.from_num(num)
|
103
|
+
unless @@inverse_type_map.key? num
|
104
|
+
name = Hammer::Internal.h_get_token_type_name(num)
|
105
|
+
if name.nil?
|
106
|
+
return nil
|
107
|
+
end
|
108
|
+
name = name.to_sym
|
109
|
+
@@known_type_map[name] = num
|
110
|
+
@@inverse_type_map[num] = name
|
111
|
+
end
|
112
|
+
return @@inverse_type_map[num]
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.native_type
|
116
|
+
FFI::Type::INT
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.to_native(val, ctx)
|
120
|
+
return val if val.is_a?(Integer)
|
121
|
+
return from_name(val)
|
122
|
+
end
|
123
|
+
|
124
|
+
def self.from_native(val, ctx)
|
125
|
+
return from_num(val) || val
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Define these as soon as possible, so that they can be used
|
130
|
+
# without fear elsewhere
|
131
|
+
attach_function :h_allocate_token_type, [:string], :int
|
132
|
+
attach_function :h_get_token_type_number, [:string], :int
|
133
|
+
attach_function :h_get_token_type_name, [:int], :string
|
134
|
+
|
135
|
+
class HCountedArray < FFI::Struct
|
136
|
+
layout :capacity, :size_t,
|
137
|
+
:used, :size_t,
|
138
|
+
:arena, :pointer,
|
139
|
+
:elements, :pointer # HParsedToken**
|
140
|
+
|
141
|
+
def length
|
142
|
+
self[:used]
|
143
|
+
end
|
144
|
+
|
145
|
+
def elements
|
146
|
+
elem_array = FFI::Pointer.new(:pointer, self[:elements])
|
147
|
+
return (0...self[:used]).map { |i| HParsedToken.new(elem_array[i].read_pointer) }
|
148
|
+
end
|
149
|
+
|
150
|
+
#def [](idx)
|
151
|
+
# raise ArgumentError, "Index out of range" unless idx >= 0 and idx < length
|
152
|
+
# elem_array = FFI::Pointer.new(:pointer, self[:elements])
|
153
|
+
# return HParsedToken.new(elem_array[i].read_pointer)
|
154
|
+
#end
|
155
|
+
|
156
|
+
def map(&code)
|
157
|
+
elements.map {|x| code.call x}
|
158
|
+
end
|
159
|
+
def each(&code)
|
160
|
+
elements.each {|x| code.call x}
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
class HBytes < FFI::Struct
|
165
|
+
layout :token, :pointer, # uint8_t*
|
166
|
+
:len, :size_t
|
167
|
+
|
168
|
+
def token
|
169
|
+
# TODO: Encoding?
|
170
|
+
# Should be the same encoding as the string the token was created with.
|
171
|
+
# But how do we get to this knowledge at this point?
|
172
|
+
# Cheap solution: Just ask the user (additional parameter with default value of UTF-8).
|
173
|
+
self[:token].read_string(self[:len])
|
174
|
+
end
|
175
|
+
|
176
|
+
# TODO: Probably should rename this to match ruby conventions: length, count, size
|
177
|
+
def len
|
178
|
+
self[:len]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
class HString < FFI::Struct
|
183
|
+
layout :content, HBytes.by_ref,
|
184
|
+
:encoding, :uint64
|
185
|
+
def token
|
186
|
+
return self[:content].token.force_encoding(
|
187
|
+
ObjectSpace._id2ref(self[:encoding]))
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
HTokenType.new(:"com.upstandinghackers.hammer.ruby.encodedStr") {|hpt|
|
192
|
+
hpt.user(HString).token
|
193
|
+
}
|
194
|
+
HTokenType.new(:"com.upstandinghackers.hammer.ruby.object") {|hpt|
|
195
|
+
ObjectSpace._id2ref(hpt[:data][:uint])
|
196
|
+
}
|
197
|
+
|
198
|
+
class HParsedTokenDataUnion < FFI::Union
|
199
|
+
layout :bytes, HBytes.by_value,
|
200
|
+
:sint, :int64,
|
201
|
+
:uint, :uint64,
|
202
|
+
:dbl, :double,
|
203
|
+
:flt, :float,
|
204
|
+
:seq, HCountedArray.by_ref,
|
205
|
+
:user, :pointer
|
206
|
+
end
|
207
|
+
|
208
|
+
class HParsedToken < FFI::Struct
|
209
|
+
layout :token_type, HTokenType,
|
210
|
+
:data, HParsedTokenDataUnion.by_value,
|
211
|
+
:index, :size_t,
|
212
|
+
:bit_offset, :char
|
213
|
+
|
214
|
+
def normalize
|
215
|
+
# If I'm null, return nil.
|
216
|
+
return nil if null?
|
217
|
+
return self
|
218
|
+
end
|
219
|
+
|
220
|
+
def token_type
|
221
|
+
self[:token_type]
|
222
|
+
end
|
223
|
+
|
224
|
+
# TODO: Is this name ok?
|
225
|
+
def data
|
226
|
+
return self[:data][:bytes].token if token_type == :bytes
|
227
|
+
return self[:data][:sint] if token_type == :sint
|
228
|
+
return self[:data][:uint] if token_type == :uint
|
229
|
+
return self[:data][:seq].elements if token_type == :sequence
|
230
|
+
return self[:data][:user] if token_type == :user
|
231
|
+
end
|
232
|
+
|
233
|
+
def bytes
|
234
|
+
raise ArgumentError, 'wrong token type' unless token_type == :bytes
|
235
|
+
self[:data][:bytes]
|
236
|
+
end
|
237
|
+
|
238
|
+
def seq
|
239
|
+
raise ArgumentError, 'wrong token type' unless token_type == :sequence
|
240
|
+
self[:data][:seq]
|
241
|
+
end
|
242
|
+
|
243
|
+
def index
|
244
|
+
self[:index]
|
245
|
+
end
|
246
|
+
|
247
|
+
def bit_offset
|
248
|
+
self[:bit_offset]
|
249
|
+
end
|
250
|
+
|
251
|
+
def user(struct)
|
252
|
+
struct.by_ref.from_native(self[:data][:user], nil)
|
253
|
+
end
|
254
|
+
|
255
|
+
def unmarshal
|
256
|
+
Hammer::Internal::HTokenType.class_variable_get(:@@from_hpt)[token_type].call self
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
class HParseResult < FFI::Struct
|
261
|
+
layout :ast, HParsedToken.by_ref,
|
262
|
+
:bit_length, :long_long,
|
263
|
+
:arena, :pointer
|
264
|
+
|
265
|
+
def ast
|
266
|
+
self[:ast].normalize
|
267
|
+
end
|
268
|
+
|
269
|
+
def bit_length
|
270
|
+
self[:bit_length]
|
271
|
+
end
|
272
|
+
|
273
|
+
def self.release(ptr)
|
274
|
+
Hammer::Internal.h_parse_result_free(ptr) unless ptr.null?
|
275
|
+
end
|
276
|
+
|
277
|
+
def arena_alloc(type)
|
278
|
+
Hammer::Internal.arena_alloc(self[:arena], type)
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def self.arena_alloc(arena, type)
|
283
|
+
ptr = h_arena_malloc(arena, type.size)
|
284
|
+
return type.by_ref.from_native(ptr, nil)
|
285
|
+
end
|
286
|
+
|
287
|
+
# run a parser
|
288
|
+
attach_function :h_parse, [:h_parser, :pointer, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string?
|
289
|
+
|
290
|
+
# build a parser
|
291
|
+
attach_function :h_token, [:buffer_in, :size_t], :h_parser
|
292
|
+
attach_function :h_ch, [:uint8], :h_parser
|
293
|
+
attach_function :h_ch_range, [:uint8, :uint8], :h_parser
|
294
|
+
attach_function :h_int_range, [:h_parser, :int64, :int64], :h_parser
|
295
|
+
attach_function :h_bits, [:size_t, :bool], :h_parser
|
296
|
+
attach_function :h_int64, [], :h_parser
|
297
|
+
attach_function :h_int32, [], :h_parser
|
298
|
+
attach_function :h_int16, [], :h_parser
|
299
|
+
attach_function :h_int8, [], :h_parser
|
300
|
+
attach_function :h_uint64, [], :h_parser
|
301
|
+
attach_function :h_uint32, [], :h_parser
|
302
|
+
attach_function :h_uint16, [], :h_parser
|
303
|
+
attach_function :h_uint8, [], :h_parser
|
304
|
+
attach_function :h_whitespace, [:h_parser], :h_parser
|
305
|
+
attach_function :h_left, [:h_parser, :h_parser], :h_parser
|
306
|
+
attach_function :h_right, [:h_parser, :h_parser], :h_parser
|
307
|
+
attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser
|
308
|
+
attach_function :h_in, [:pointer, :size_t], :h_parser
|
309
|
+
attach_function :h_not_in, [:pointer, :size_t], :h_parser
|
310
|
+
attach_function :h_end_p, [], :h_parser
|
311
|
+
attach_function :h_nothing_p, [], :h_parser
|
312
|
+
attach_function :h_sequence, [:varargs], :h_parser
|
313
|
+
attach_function :h_choice, [:varargs], :h_parser
|
314
|
+
attach_function :h_butnot, [:h_parser, :h_parser], :h_parser
|
315
|
+
attach_function :h_difference, [:h_parser, :h_parser], :h_parser
|
316
|
+
attach_function :h_xor, [:h_parser, :h_parser], :h_parser
|
317
|
+
attach_function :h_many, [:h_parser], :h_parser
|
318
|
+
attach_function :h_many1, [:h_parser], :h_parser
|
319
|
+
attach_function :h_repeat_n, [:h_parser, :size_t], :h_parser
|
320
|
+
attach_function :h_optional, [:h_parser], :h_parser
|
321
|
+
attach_function :h_ignore, [:h_parser], :h_parser
|
322
|
+
attach_function :h_sepBy, [:h_parser, :h_parser], :h_parser
|
323
|
+
attach_function :h_sepBy1, [:h_parser, :h_parser], :h_parser
|
324
|
+
attach_function :h_epsilon_p, [], :h_parser
|
325
|
+
attach_function :h_length_value, [:h_parser, :h_parser], :h_parser
|
326
|
+
attach_function :h_and, [:h_parser], :h_parser
|
327
|
+
attach_function :h_not, [:h_parser], :h_parser
|
328
|
+
|
329
|
+
attach_function :h_indirect, [], :h_parser
|
330
|
+
attach_function :h_bind_indirect, [:h_parser, :h_parser], :void
|
331
|
+
|
332
|
+
callback :HAction, [HParseResult.by_ref], HParsedToken.by_ref
|
333
|
+
attach_function :h_action, [:h_parser, :HAction], :h_parser
|
334
|
+
|
335
|
+
callback :HPredicate, [HParseResult.by_ref], :bool
|
336
|
+
attach_function :h_attr_bool, [:h_parser, :HPredicate], :h_parser
|
337
|
+
|
338
|
+
# free the parse result
|
339
|
+
attach_function :h_parse_result_free, [HParseResult.by_ref], :void
|
340
|
+
|
341
|
+
# TODO: Does the HParser* need to be freed?
|
342
|
+
|
343
|
+
# Add the arena
|
344
|
+
attach_function :h_arena_malloc, [:pointer, :size_t], :pointer
|
345
|
+
end
|
346
|
+
end
|