hammer-parser 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b813026f979c544803273ae966bd9f1858e4f510
4
+ data.tar.gz: b48b3e11a02fc19be47139330a2a14e7c91558f6
5
+ SHA512:
6
+ metadata.gz: d33df5b0e64a59baf7d27ce600edfca7eabf6055fea496b4df5ac148cf036183d88959ac3c3a7d7bc56f23273e132d2409b308cc61177925536a914f7e317b16
7
+ data.tar.gz: 147863e19142b07d561f44f9ccd6b994d8553c7cd2ef7cf5b6d03b7507ffaa95d0aa76fc63d3d3d438397a599bcee124fbb691478548de7acc7d3765f8a72ca3
@@ -0,0 +1,101 @@
1
+ # hammer-parser
2
+
3
+ Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsing library.
4
+
5
+
6
+ ## Notes
7
+
8
+ * I called the gem `hammer-parser`, since there already is a [gem named `hammer`](https://rubygems.org/gems/hammer).
9
+
10
+
11
+ ## Development
12
+
13
+ 1. `cd src/bindings/ruby`.
14
+
15
+ 2. Run `bundle install` to install dependencies.
16
+
17
+ 3. Run `bundle console` to open `irb` with hammer loaded.
18
+
19
+ 4. To run tests, just run `bundle exec rake test`.
20
+
21
+
22
+ ## Installation
23
+
24
+ 1. Download the hammer source code, and make it available system wide with the bindings.
25
+
26
+ `git clone https://github.com/UpstandingHackers/hammer`
27
+
28
+ `cd hammer`
29
+
30
+ `scons bindings=ruby`
31
+
32
+ `sudo scons bindings=ruby install`
33
+
34
+ 2. On linux, you will have to do
35
+
36
+ `sudo ldconfig`
37
+
38
+ 3. Build the gem
39
+ `gem build hammer-parser.gemspec`
40
+
41
+ 4. Install the gem
42
+ `gem install hammer-parser-x.x.x.gem`
43
+
44
+
45
+ ## Examples
46
+
47
+ Add hammer to your Gemfile.
48
+
49
+ `gem 'hammer-parser'`
50
+
51
+ Use hammer in your project.
52
+
53
+ `require 'hammer-parser'`
54
+
55
+ ### Building a parser
56
+
57
+ ```ruby
58
+ parser = Hammer::Parser.build {
59
+ token 'Hello '
60
+ choice {
61
+ token 'Mom'
62
+ token 'Dad'
63
+ }
64
+ token '!'
65
+ }
66
+ ```
67
+
68
+ Also possible:
69
+
70
+ ```ruby
71
+ parser = Hammer::ParserBuilder.new
72
+ .token('Hello ')
73
+ .choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad'))
74
+ .token('!')
75
+ .build
76
+ ```
77
+
78
+ More like hammer in C:
79
+
80
+ ```ruby
81
+ h = Hammer::Parser
82
+ parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!'))
83
+ ```
84
+
85
+ ### Parsing
86
+
87
+ ```ruby
88
+ result = parser.parse 'Hello Mom!'
89
+ => #<HParseResult>
90
+ result = parser.parse 'Hello Someone!'
91
+ => nil
92
+ ```
93
+
94
+ The `parse` method returns an `HParseResult` object, which needs to be
95
+ kept around until you're entirely done with the parse tree, which can
96
+ be accessed with `result.ast`.
97
+
98
+ While the AST can be accessed using the same interface as the C
99
+ HParsedToken type, we recommend using `result.ast.unmarshal` instead.
100
+ This converts the entire parse tree into a standalone Ruby-native
101
+ datastructure which will likely be much easier to work with.
@@ -0,0 +1,47 @@
1
+ require 'hammer/internal'
2
+ require 'hammer/parser'
3
+ require 'hammer/parser_builder'
4
+
5
+ # Leave this in for now to be able to play around with HParseResult in irb.
6
+ x = nil
7
+ parser = Hammer::Parser.build {
8
+ token 'abc'
9
+ x = indirect
10
+ end_p
11
+ }
12
+ x.bind(Hammer::Parser.token('abd'))
13
+
14
+ #$p = parser
15
+ $r = parser.parse 'abcabd'
16
+
17
+ #p $r[:ast][:data][:seq].elements.map {|e| e[:data][:bytes].token }
18
+
19
+
20
+ h = Hammer::Parser
21
+ parser =
22
+ h.many(
23
+ h.action(h.uint8) { |r|
24
+ #p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}"
25
+ r.data * 2
26
+ })
27
+
28
+ #parser = Hammer::Parser.build {
29
+ # many {
30
+ # uint8
31
+ # action { |r|
32
+ # p r
33
+ # r[:ast]
34
+ # }
35
+ # }
36
+ #}
37
+
38
+ $r = parser.parse 'abcdefgh'
39
+
40
+ #p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]}
41
+ # or:
42
+ #p $r.ast.data.map(&:data)
43
+
44
+
45
+ h = Hammer::Parser
46
+ parser = h.many(h.attr_bool(h.uint8) { |r| r.data <= 100 })
47
+ #p parser.parse('abcdefgh').ast.data.map(&:data)
@@ -0,0 +1,346 @@
1
+ require 'ffi'
2
+
3
+ module Hammer
4
+ module Internal
5
+ extend FFI::Library
6
+
7
+ ffi_lib 'hammer'
8
+
9
+ class DynamicVariable
10
+ SYMBOL_PREFIX = "Hammer::Internal::DynamicVariable gensym "
11
+ @@current_symbol = 0
12
+
13
+ def initialize(default=nil, name=nil, &block)
14
+ # This can take either a default value or a block. If a
15
+ # default value is given, all threads' dynvars are initialized
16
+ # to that object. If a block is given, the block is lazilly
17
+ # called on each thread to generate the initial value. If
18
+ # both a block and a default value are passed, the block is
19
+ # called with the literal value.
20
+ @default = default
21
+ @block = block || Proc.new{|x| x}
22
+ @@current_symbol += 1
23
+ @sym = (SYMBOL_PREFIX + @@current_symbol.to_s).to_sym
24
+ end
25
+
26
+ def value
27
+ if Thread.current.key? @sym
28
+ return Thread.current[@sym]
29
+ else
30
+ return Thread.current[@sym] = @block.call(@default)
31
+ end
32
+ end
33
+
34
+ def value=(new_value)
35
+ Thread.current[@sym] = new_value
36
+ end
37
+
38
+ def with(new_value, &block)
39
+ old_value = value
40
+ begin
41
+ self.value = new_value
42
+ return block.call
43
+ ensure
44
+ self.value = old_value
45
+ end
46
+ end
47
+ end
48
+
49
+ # Maybe we can implement Hammer::Parser with FFI::DataConverter.
50
+ # That way, most hammer functions won't need to be wrapped.
51
+ # (Probably need to wrap token, sequence and choice only).
52
+ # See http://www.elabs.se/blog/61-advanced-topics-in-ruby-ffi
53
+ typedef :pointer, :h_parser
54
+
55
+ class HTokenType
56
+ extend FFI::DataConverter
57
+
58
+ @@known_type_map = {
59
+ :none => 1,
60
+ :bytes => 2,
61
+ :sint => 4,
62
+ :uint => 8,
63
+ :sequence => 16,
64
+ }
65
+
66
+ @@inverse_type_map = @@known_type_map.invert
67
+
68
+ @@from_hpt = {
69
+ :none => Proc.new { nil },
70
+ :bytes => Proc.new {|hpt| hpt[:data][:bytes].token},
71
+ :sint => Proc.new {|hpt| hpt[:data][:sint]},
72
+ :uint => Proc.new {|hpt| hpt[:data][:uint]},
73
+ :sequence => Proc.new {|hpt| hpt[:data][:seq].map {|x| x.unmarshal}},
74
+ }
75
+
76
+ def self.new(name, &block)
77
+ if name.is_a?(Symbol)
78
+ name_sym = name
79
+ name_str = name.to_s
80
+ else
81
+ name_str = name.to_s
82
+ name_sym = name.to_sym
83
+ end
84
+ num = Hammer::Internal.h_allocate_token_type(name_str)
85
+ @@known_type_map[name_sym] = num
86
+ @@inverse_type_map[num] = name_sym
87
+ @@from_hpt[name_sym] = block
88
+ end
89
+
90
+ def self.from_name(name)
91
+ unless @@known_type_map.key? name
92
+ num = Hammer::Internal.h_get_token_type_number(name.to_s)
93
+ if num <= 0
94
+ raise ArgumentError, "Unknown token type #{name}"
95
+ end
96
+ @@known_type_map[name] = num
97
+ @@inverse_type_map[num] = name
98
+ end
99
+ return @@known_type_map[name]
100
+ end
101
+
102
+ def self.from_num(num)
103
+ unless @@inverse_type_map.key? num
104
+ name = Hammer::Internal.h_get_token_type_name(num)
105
+ if name.nil?
106
+ return nil
107
+ end
108
+ name = name.to_sym
109
+ @@known_type_map[name] = num
110
+ @@inverse_type_map[num] = name
111
+ end
112
+ return @@inverse_type_map[num]
113
+ end
114
+
115
+ def self.native_type
116
+ FFI::Type::INT
117
+ end
118
+
119
+ def self.to_native(val, ctx)
120
+ return val if val.is_a?(Integer)
121
+ return from_name(val)
122
+ end
123
+
124
+ def self.from_native(val, ctx)
125
+ return from_num(val) || val
126
+ end
127
+ end
128
+
129
+ # Define these as soon as possible, so that they can be used
130
+ # without fear elsewhere
131
+ attach_function :h_allocate_token_type, [:string], :int
132
+ attach_function :h_get_token_type_number, [:string], :int
133
+ attach_function :h_get_token_type_name, [:int], :string
134
+
135
+ class HCountedArray < FFI::Struct
136
+ layout :capacity, :size_t,
137
+ :used, :size_t,
138
+ :arena, :pointer,
139
+ :elements, :pointer # HParsedToken**
140
+
141
+ def length
142
+ self[:used]
143
+ end
144
+
145
+ def elements
146
+ elem_array = FFI::Pointer.new(:pointer, self[:elements])
147
+ return (0...self[:used]).map { |i| HParsedToken.new(elem_array[i].read_pointer) }
148
+ end
149
+
150
+ #def [](idx)
151
+ # raise ArgumentError, "Index out of range" unless idx >= 0 and idx < length
152
+ # elem_array = FFI::Pointer.new(:pointer, self[:elements])
153
+ # return HParsedToken.new(elem_array[i].read_pointer)
154
+ #end
155
+
156
+ def map(&code)
157
+ elements.map {|x| code.call x}
158
+ end
159
+ def each(&code)
160
+ elements.each {|x| code.call x}
161
+ end
162
+ end
163
+
164
+ class HBytes < FFI::Struct
165
+ layout :token, :pointer, # uint8_t*
166
+ :len, :size_t
167
+
168
+ def token
169
+ # TODO: Encoding?
170
+ # Should be the same encoding as the string the token was created with.
171
+ # But how do we get to this knowledge at this point?
172
+ # Cheap solution: Just ask the user (additional parameter with default value of UTF-8).
173
+ self[:token].read_string(self[:len])
174
+ end
175
+
176
+ # TODO: Probably should rename this to match ruby conventions: length, count, size
177
+ def len
178
+ self[:len]
179
+ end
180
+ end
181
+
182
+ class HString < FFI::Struct
183
+ layout :content, HBytes.by_ref,
184
+ :encoding, :uint64
185
+ def token
186
+ return self[:content].token.force_encoding(
187
+ ObjectSpace._id2ref(self[:encoding]))
188
+ end
189
+ end
190
+
191
+ HTokenType.new(:"com.upstandinghackers.hammer.ruby.encodedStr") {|hpt|
192
+ hpt.user(HString).token
193
+ }
194
+ HTokenType.new(:"com.upstandinghackers.hammer.ruby.object") {|hpt|
195
+ ObjectSpace._id2ref(hpt[:data][:uint])
196
+ }
197
+
198
+ class HParsedTokenDataUnion < FFI::Union
199
+ layout :bytes, HBytes.by_value,
200
+ :sint, :int64,
201
+ :uint, :uint64,
202
+ :dbl, :double,
203
+ :flt, :float,
204
+ :seq, HCountedArray.by_ref,
205
+ :user, :pointer
206
+ end
207
+
208
+ class HParsedToken < FFI::Struct
209
+ layout :token_type, HTokenType,
210
+ :data, HParsedTokenDataUnion.by_value,
211
+ :index, :size_t,
212
+ :bit_offset, :char
213
+
214
+ def normalize
215
+ # If I'm null, return nil.
216
+ return nil if null?
217
+ return self
218
+ end
219
+
220
+ def token_type
221
+ self[:token_type]
222
+ end
223
+
224
+ # TODO: Is this name ok?
225
+ def data
226
+ return self[:data][:bytes].token if token_type == :bytes
227
+ return self[:data][:sint] if token_type == :sint
228
+ return self[:data][:uint] if token_type == :uint
229
+ return self[:data][:seq].elements if token_type == :sequence
230
+ return self[:data][:user] if token_type == :user
231
+ end
232
+
233
+ def bytes
234
+ raise ArgumentError, 'wrong token type' unless token_type == :bytes
235
+ self[:data][:bytes]
236
+ end
237
+
238
+ def seq
239
+ raise ArgumentError, 'wrong token type' unless token_type == :sequence
240
+ self[:data][:seq]
241
+ end
242
+
243
+ def index
244
+ self[:index]
245
+ end
246
+
247
+ def bit_offset
248
+ self[:bit_offset]
249
+ end
250
+
251
+ def user(struct)
252
+ struct.by_ref.from_native(self[:data][:user], nil)
253
+ end
254
+
255
+ def unmarshal
256
+ Hammer::Internal::HTokenType.class_variable_get(:@@from_hpt)[token_type].call self
257
+ end
258
+ end
259
+
260
+ class HParseResult < FFI::Struct
261
+ layout :ast, HParsedToken.by_ref,
262
+ :bit_length, :long_long,
263
+ :arena, :pointer
264
+
265
+ def ast
266
+ self[:ast].normalize
267
+ end
268
+
269
+ def bit_length
270
+ self[:bit_length]
271
+ end
272
+
273
+ def self.release(ptr)
274
+ Hammer::Internal.h_parse_result_free(ptr) unless ptr.null?
275
+ end
276
+
277
+ def arena_alloc(type)
278
+ Hammer::Internal.arena_alloc(self[:arena], type)
279
+ end
280
+ end
281
+
282
+ def self.arena_alloc(arena, type)
283
+ ptr = h_arena_malloc(arena, type.size)
284
+ return type.by_ref.from_native(ptr, nil)
285
+ end
286
+
287
+ # run a parser
288
+ attach_function :h_parse, [:h_parser, :pointer, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string?
289
+
290
+ # build a parser
291
+ attach_function :h_token, [:buffer_in, :size_t], :h_parser
292
+ attach_function :h_ch, [:uint8], :h_parser
293
+ attach_function :h_ch_range, [:uint8, :uint8], :h_parser
294
+ attach_function :h_int_range, [:h_parser, :int64, :int64], :h_parser
295
+ attach_function :h_bits, [:size_t, :bool], :h_parser
296
+ attach_function :h_int64, [], :h_parser
297
+ attach_function :h_int32, [], :h_parser
298
+ attach_function :h_int16, [], :h_parser
299
+ attach_function :h_int8, [], :h_parser
300
+ attach_function :h_uint64, [], :h_parser
301
+ attach_function :h_uint32, [], :h_parser
302
+ attach_function :h_uint16, [], :h_parser
303
+ attach_function :h_uint8, [], :h_parser
304
+ attach_function :h_whitespace, [:h_parser], :h_parser
305
+ attach_function :h_left, [:h_parser, :h_parser], :h_parser
306
+ attach_function :h_right, [:h_parser, :h_parser], :h_parser
307
+ attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser
308
+ attach_function :h_in, [:pointer, :size_t], :h_parser
309
+ attach_function :h_not_in, [:pointer, :size_t], :h_parser
310
+ attach_function :h_end_p, [], :h_parser
311
+ attach_function :h_nothing_p, [], :h_parser
312
+ attach_function :h_sequence, [:varargs], :h_parser
313
+ attach_function :h_choice, [:varargs], :h_parser
314
+ attach_function :h_butnot, [:h_parser, :h_parser], :h_parser
315
+ attach_function :h_difference, [:h_parser, :h_parser], :h_parser
316
+ attach_function :h_xor, [:h_parser, :h_parser], :h_parser
317
+ attach_function :h_many, [:h_parser], :h_parser
318
+ attach_function :h_many1, [:h_parser], :h_parser
319
+ attach_function :h_repeat_n, [:h_parser, :size_t], :h_parser
320
+ attach_function :h_optional, [:h_parser], :h_parser
321
+ attach_function :h_ignore, [:h_parser], :h_parser
322
+ attach_function :h_sepBy, [:h_parser, :h_parser], :h_parser
323
+ attach_function :h_sepBy1, [:h_parser, :h_parser], :h_parser
324
+ attach_function :h_epsilon_p, [], :h_parser
325
+ attach_function :h_length_value, [:h_parser, :h_parser], :h_parser
326
+ attach_function :h_and, [:h_parser], :h_parser
327
+ attach_function :h_not, [:h_parser], :h_parser
328
+
329
+ attach_function :h_indirect, [], :h_parser
330
+ attach_function :h_bind_indirect, [:h_parser, :h_parser], :void
331
+
332
+ callback :HAction, [HParseResult.by_ref], HParsedToken.by_ref
333
+ attach_function :h_action, [:h_parser, :HAction], :h_parser
334
+
335
+ callback :HPredicate, [HParseResult.by_ref], :bool
336
+ attach_function :h_attr_bool, [:h_parser, :HPredicate], :h_parser
337
+
338
+ # free the parse result
339
+ attach_function :h_parse_result_free, [HParseResult.by_ref], :void
340
+
341
+ # TODO: Does the HParser* need to be freed?
342
+
343
+ # Add the arena
344
+ attach_function :h_arena_malloc, [:pointer, :size_t], :pointer
345
+ end
346
+ end