hammer-parser 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b813026f979c544803273ae966bd9f1858e4f510
4
+ data.tar.gz: b48b3e11a02fc19be47139330a2a14e7c91558f6
5
+ SHA512:
6
+ metadata.gz: d33df5b0e64a59baf7d27ce600edfca7eabf6055fea496b4df5ac148cf036183d88959ac3c3a7d7bc56f23273e132d2409b308cc61177925536a914f7e317b16
7
+ data.tar.gz: 147863e19142b07d561f44f9ccd6b994d8553c7cd2ef7cf5b6d03b7507ffaa95d0aa76fc63d3d3d438397a599bcee124fbb691478548de7acc7d3765f8a72ca3
@@ -0,0 +1,101 @@
1
+ # hammer-parser
2
+
3
+ Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsing library.
4
+
5
+
6
+ ## Notes
7
+
8
+ * I called the gem `hammer-parser`, since there already is a [gem named `hammer`](https://rubygems.org/gems/hammer).
9
+
10
+
11
+ ## Development
12
+
13
+ 1. `cd src/bindings/ruby`.
14
+
15
+ 2. Run `bundle install` to install dependencies.
16
+
17
+ 3. Run `bundle console` to open `irb` with hammer loaded.
18
+
19
+ 4. To run tests, just run `bundle exec rake test`.
20
+
21
+
22
+ ## Installation
23
+
24
+ 1. Download the hammer source code, and make it available system wide with the bindings.
25
+
26
+ `git clone https://github.com/UpstandingHackers/hammer`
27
+
28
+ `cd hammer`
29
+
30
+ `scons bindings=ruby`
31
+
32
+ `sudo scons bindings=ruby install`
33
+
34
+ 2. On linux, you will have to do
35
+
36
+ `sudo ldconfig`
37
+
38
+ 3. Build the gem
39
+ `gem build hammer-parser.gemspec`
40
+
41
+ 4. Install the gem
42
+ `gem install hammer-parser-x.x.x.gem`
43
+
44
+
45
+ ## Examples
46
+
47
+ Add hammer to your Gemfile.
48
+
49
+ `gem 'hammer-parser'`
50
+
51
+ Use hammer in your project.
52
+
53
+ `require 'hammer-parser'`
54
+
55
+ ### Building a parser
56
+
57
+ ```ruby
58
+ parser = Hammer::Parser.build {
59
+ token 'Hello '
60
+ choice {
61
+ token 'Mom'
62
+ token 'Dad'
63
+ }
64
+ token '!'
65
+ }
66
+ ```
67
+
68
+ Also possible:
69
+
70
+ ```ruby
71
+ parser = Hammer::ParserBuilder.new
72
+ .token('Hello ')
73
+ .choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad'))
74
+ .token('!')
75
+ .build
76
+ ```
77
+
78
+ More like hammer in C:
79
+
80
+ ```ruby
81
+ h = Hammer::Parser
82
+ parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!'))
83
+ ```
84
+
85
+ ### Parsing
86
+
87
+ ```ruby
88
+ result = parser.parse 'Hello Mom!'
89
+ => #<HParseResult>
90
+ result = parser.parse 'Hello Someone!'
91
+ => nil
92
+ ```
93
+
94
+ The `parse` method returns an `HParseResult` object, which needs to be
95
+ kept around until you're entirely done with the parse tree, which can
96
+ be accessed with `result.ast`.
97
+
98
+ While the AST can be accessed using the same interface as the C
99
+ HParsedToken type, we recommend using `result.ast.unmarshal` instead.
100
+ This converts the entire parse tree into a standalone Ruby-native
101
+ datastructure which will likely be much easier to work with.
@@ -0,0 +1,47 @@
1
+ require 'hammer/internal'
2
+ require 'hammer/parser'
3
+ require 'hammer/parser_builder'
4
+
5
+ # Leave this in for now to be able to play around with HParseResult in irb.
6
+ x = nil
7
+ parser = Hammer::Parser.build {
8
+ token 'abc'
9
+ x = indirect
10
+ end_p
11
+ }
12
+ x.bind(Hammer::Parser.token('abd'))
13
+
14
+ #$p = parser
15
+ $r = parser.parse 'abcabd'
16
+
17
+ #p $r[:ast][:data][:seq].elements.map {|e| e[:data][:bytes].token }
18
+
19
+
20
+ h = Hammer::Parser
21
+ parser =
22
+ h.many(
23
+ h.action(h.uint8) { |r|
24
+ #p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}"
25
+ r.data * 2
26
+ })
27
+
28
+ #parser = Hammer::Parser.build {
29
+ # many {
30
+ # uint8
31
+ # action { |r|
32
+ # p r
33
+ # r[:ast]
34
+ # }
35
+ # }
36
+ #}
37
+
38
+ $r = parser.parse 'abcdefgh'
39
+
40
+ #p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]}
41
+ # or:
42
+ #p $r.ast.data.map(&:data)
43
+
44
+
45
+ h = Hammer::Parser
46
+ parser = h.many(h.attr_bool(h.uint8) { |r| r.data <= 100 })
47
+ #p parser.parse('abcdefgh').ast.data.map(&:data)
@@ -0,0 +1,346 @@
1
+ require 'ffi'
2
+
3
+ module Hammer
4
+ module Internal
5
+ extend FFI::Library
6
+
7
+ ffi_lib 'hammer'
8
+
9
+ class DynamicVariable
10
+ SYMBOL_PREFIX = "Hammer::Internal::DynamicVariable gensym "
11
+ @@current_symbol = 0
12
+
13
+ def initialize(default=nil, name=nil, &block)
14
+ # This can take either a default value or a block. If a
15
+ # default value is given, all threads' dynvars are initialized
16
+ # to that object. If a block is given, the block is lazilly
17
+ # called on each thread to generate the initial value. If
18
+ # both a block and a default value are passed, the block is
19
+ # called with the literal value.
20
+ @default = default
21
+ @block = block || Proc.new{|x| x}
22
+ @@current_symbol += 1
23
+ @sym = (SYMBOL_PREFIX + @@current_symbol.to_s).to_sym
24
+ end
25
+
26
+ def value
27
+ if Thread.current.key? @sym
28
+ return Thread.current[@sym]
29
+ else
30
+ return Thread.current[@sym] = @block.call(@default)
31
+ end
32
+ end
33
+
34
+ def value=(new_value)
35
+ Thread.current[@sym] = new_value
36
+ end
37
+
38
+ def with(new_value, &block)
39
+ old_value = value
40
+ begin
41
+ self.value = new_value
42
+ return block.call
43
+ ensure
44
+ self.value = old_value
45
+ end
46
+ end
47
+ end
48
+
49
+ # Maybe we can implement Hammer::Parser with FFI::DataConverter.
50
+ # That way, most hammer functions won't need to be wrapped.
51
+ # (Probably need to wrap token, sequence and choice only).
52
+ # See http://www.elabs.se/blog/61-advanced-topics-in-ruby-ffi
53
+ typedef :pointer, :h_parser
54
+
55
+ class HTokenType
56
+ extend FFI::DataConverter
57
+
58
+ @@known_type_map = {
59
+ :none => 1,
60
+ :bytes => 2,
61
+ :sint => 4,
62
+ :uint => 8,
63
+ :sequence => 16,
64
+ }
65
+
66
+ @@inverse_type_map = @@known_type_map.invert
67
+
68
+ @@from_hpt = {
69
+ :none => Proc.new { nil },
70
+ :bytes => Proc.new {|hpt| hpt[:data][:bytes].token},
71
+ :sint => Proc.new {|hpt| hpt[:data][:sint]},
72
+ :uint => Proc.new {|hpt| hpt[:data][:uint]},
73
+ :sequence => Proc.new {|hpt| hpt[:data][:seq].map {|x| x.unmarshal}},
74
+ }
75
+
76
+ def self.new(name, &block)
77
+ if name.is_a?(Symbol)
78
+ name_sym = name
79
+ name_str = name.to_s
80
+ else
81
+ name_str = name.to_s
82
+ name_sym = name.to_sym
83
+ end
84
+ num = Hammer::Internal.h_allocate_token_type(name_str)
85
+ @@known_type_map[name_sym] = num
86
+ @@inverse_type_map[num] = name_sym
87
+ @@from_hpt[name_sym] = block
88
+ end
89
+
90
+ def self.from_name(name)
91
+ unless @@known_type_map.key? name
92
+ num = Hammer::Internal.h_get_token_type_number(name.to_s)
93
+ if num <= 0
94
+ raise ArgumentError, "Unknown token type #{name}"
95
+ end
96
+ @@known_type_map[name] = num
97
+ @@inverse_type_map[num] = name
98
+ end
99
+ return @@known_type_map[name]
100
+ end
101
+
102
+ def self.from_num(num)
103
+ unless @@inverse_type_map.key? num
104
+ name = Hammer::Internal.h_get_token_type_name(num)
105
+ if name.nil?
106
+ return nil
107
+ end
108
+ name = name.to_sym
109
+ @@known_type_map[name] = num
110
+ @@inverse_type_map[num] = name
111
+ end
112
+ return @@inverse_type_map[num]
113
+ end
114
+
115
+ def self.native_type
116
+ FFI::Type::INT
117
+ end
118
+
119
+ def self.to_native(val, ctx)
120
+ return val if val.is_a?(Integer)
121
+ return from_name(val)
122
+ end
123
+
124
+ def self.from_native(val, ctx)
125
+ return from_num(val) || val
126
+ end
127
+ end
128
+
129
+ # Define these as soon as possible, so that they can be used
130
+ # without fear elsewhere
131
+ attach_function :h_allocate_token_type, [:string], :int
132
+ attach_function :h_get_token_type_number, [:string], :int
133
+ attach_function :h_get_token_type_name, [:int], :string
134
+
135
+ class HCountedArray < FFI::Struct
136
+ layout :capacity, :size_t,
137
+ :used, :size_t,
138
+ :arena, :pointer,
139
+ :elements, :pointer # HParsedToken**
140
+
141
+ def length
142
+ self[:used]
143
+ end
144
+
145
+ def elements
146
+ elem_array = FFI::Pointer.new(:pointer, self[:elements])
147
+ return (0...self[:used]).map { |i| HParsedToken.new(elem_array[i].read_pointer) }
148
+ end
149
+
150
+ #def [](idx)
151
+ # raise ArgumentError, "Index out of range" unless idx >= 0 and idx < length
152
+ # elem_array = FFI::Pointer.new(:pointer, self[:elements])
153
+ # return HParsedToken.new(elem_array[i].read_pointer)
154
+ #end
155
+
156
+ def map(&code)
157
+ elements.map {|x| code.call x}
158
+ end
159
+ def each(&code)
160
+ elements.each {|x| code.call x}
161
+ end
162
+ end
163
+
164
+ class HBytes < FFI::Struct
165
+ layout :token, :pointer, # uint8_t*
166
+ :len, :size_t
167
+
168
+ def token
169
+ # TODO: Encoding?
170
+ # Should be the same encoding as the string the token was created with.
171
+ # But how do we get to this knowledge at this point?
172
+ # Cheap solution: Just ask the user (additional parameter with default value of UTF-8).
173
+ self[:token].read_string(self[:len])
174
+ end
175
+
176
+ # TODO: Probably should rename this to match ruby conventions: length, count, size
177
+ def len
178
+ self[:len]
179
+ end
180
+ end
181
+
182
+ class HString < FFI::Struct
183
+ layout :content, HBytes.by_ref,
184
+ :encoding, :uint64
185
+ def token
186
+ return self[:content].token.force_encoding(
187
+ ObjectSpace._id2ref(self[:encoding]))
188
+ end
189
+ end
190
+
191
+ HTokenType.new(:"com.upstandinghackers.hammer.ruby.encodedStr") {|hpt|
192
+ hpt.user(HString).token
193
+ }
194
+ HTokenType.new(:"com.upstandinghackers.hammer.ruby.object") {|hpt|
195
+ ObjectSpace._id2ref(hpt[:data][:uint])
196
+ }
197
+
198
+ class HParsedTokenDataUnion < FFI::Union
199
+ layout :bytes, HBytes.by_value,
200
+ :sint, :int64,
201
+ :uint, :uint64,
202
+ :dbl, :double,
203
+ :flt, :float,
204
+ :seq, HCountedArray.by_ref,
205
+ :user, :pointer
206
+ end
207
+
208
+ class HParsedToken < FFI::Struct
209
+ layout :token_type, HTokenType,
210
+ :data, HParsedTokenDataUnion.by_value,
211
+ :index, :size_t,
212
+ :bit_offset, :char
213
+
214
+ def normalize
215
+ # If I'm null, return nil.
216
+ return nil if null?
217
+ return self
218
+ end
219
+
220
+ def token_type
221
+ self[:token_type]
222
+ end
223
+
224
+ # TODO: Is this name ok?
225
+ def data
226
+ return self[:data][:bytes].token if token_type == :bytes
227
+ return self[:data][:sint] if token_type == :sint
228
+ return self[:data][:uint] if token_type == :uint
229
+ return self[:data][:seq].elements if token_type == :sequence
230
+ return self[:data][:user] if token_type == :user
231
+ end
232
+
233
+ def bytes
234
+ raise ArgumentError, 'wrong token type' unless token_type == :bytes
235
+ self[:data][:bytes]
236
+ end
237
+
238
+ def seq
239
+ raise ArgumentError, 'wrong token type' unless token_type == :sequence
240
+ self[:data][:seq]
241
+ end
242
+
243
+ def index
244
+ self[:index]
245
+ end
246
+
247
+ def bit_offset
248
+ self[:bit_offset]
249
+ end
250
+
251
+ def user(struct)
252
+ struct.by_ref.from_native(self[:data][:user], nil)
253
+ end
254
+
255
+ def unmarshal
256
+ Hammer::Internal::HTokenType.class_variable_get(:@@from_hpt)[token_type].call self
257
+ end
258
+ end
259
+
260
+ class HParseResult < FFI::Struct
261
+ layout :ast, HParsedToken.by_ref,
262
+ :bit_length, :long_long,
263
+ :arena, :pointer
264
+
265
+ def ast
266
+ self[:ast].normalize
267
+ end
268
+
269
+ def bit_length
270
+ self[:bit_length]
271
+ end
272
+
273
+ def self.release(ptr)
274
+ Hammer::Internal.h_parse_result_free(ptr) unless ptr.null?
275
+ end
276
+
277
+ def arena_alloc(type)
278
+ Hammer::Internal.arena_alloc(self[:arena], type)
279
+ end
280
+ end
281
+
282
+ def self.arena_alloc(arena, type)
283
+ ptr = h_arena_malloc(arena, type.size)
284
+ return type.by_ref.from_native(ptr, nil)
285
+ end
286
+
287
+ # run a parser
288
+ attach_function :h_parse, [:h_parser, :pointer, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string?
289
+
290
+ # build a parser
291
+ attach_function :h_token, [:buffer_in, :size_t], :h_parser
292
+ attach_function :h_ch, [:uint8], :h_parser
293
+ attach_function :h_ch_range, [:uint8, :uint8], :h_parser
294
+ attach_function :h_int_range, [:h_parser, :int64, :int64], :h_parser
295
+ attach_function :h_bits, [:size_t, :bool], :h_parser
296
+ attach_function :h_int64, [], :h_parser
297
+ attach_function :h_int32, [], :h_parser
298
+ attach_function :h_int16, [], :h_parser
299
+ attach_function :h_int8, [], :h_parser
300
+ attach_function :h_uint64, [], :h_parser
301
+ attach_function :h_uint32, [], :h_parser
302
+ attach_function :h_uint16, [], :h_parser
303
+ attach_function :h_uint8, [], :h_parser
304
+ attach_function :h_whitespace, [:h_parser], :h_parser
305
+ attach_function :h_left, [:h_parser, :h_parser], :h_parser
306
+ attach_function :h_right, [:h_parser, :h_parser], :h_parser
307
+ attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser
308
+ attach_function :h_in, [:pointer, :size_t], :h_parser
309
+ attach_function :h_not_in, [:pointer, :size_t], :h_parser
310
+ attach_function :h_end_p, [], :h_parser
311
+ attach_function :h_nothing_p, [], :h_parser
312
+ attach_function :h_sequence, [:varargs], :h_parser
313
+ attach_function :h_choice, [:varargs], :h_parser
314
+ attach_function :h_butnot, [:h_parser, :h_parser], :h_parser
315
+ attach_function :h_difference, [:h_parser, :h_parser], :h_parser
316
+ attach_function :h_xor, [:h_parser, :h_parser], :h_parser
317
+ attach_function :h_many, [:h_parser], :h_parser
318
+ attach_function :h_many1, [:h_parser], :h_parser
319
+ attach_function :h_repeat_n, [:h_parser, :size_t], :h_parser
320
+ attach_function :h_optional, [:h_parser], :h_parser
321
+ attach_function :h_ignore, [:h_parser], :h_parser
322
+ attach_function :h_sepBy, [:h_parser, :h_parser], :h_parser
323
+ attach_function :h_sepBy1, [:h_parser, :h_parser], :h_parser
324
+ attach_function :h_epsilon_p, [], :h_parser
325
+ attach_function :h_length_value, [:h_parser, :h_parser], :h_parser
326
+ attach_function :h_and, [:h_parser], :h_parser
327
+ attach_function :h_not, [:h_parser], :h_parser
328
+
329
+ attach_function :h_indirect, [], :h_parser
330
+ attach_function :h_bind_indirect, [:h_parser, :h_parser], :void
331
+
332
+ callback :HAction, [HParseResult.by_ref], HParsedToken.by_ref
333
+ attach_function :h_action, [:h_parser, :HAction], :h_parser
334
+
335
+ callback :HPredicate, [HParseResult.by_ref], :bool
336
+ attach_function :h_attr_bool, [:h_parser, :HPredicate], :h_parser
337
+
338
+ # free the parse result
339
+ attach_function :h_parse_result_free, [HParseResult.by_ref], :void
340
+
341
+ # TODO: Does the HParser* need to be freed?
342
+
343
+ # Add the arena
344
+ attach_function :h_arena_malloc, [:pointer, :size_t], :pointer
345
+ end
346
+ end