rsec 0.3.2 → 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- data/examples/little_markdown.rb +167 -0
- data/examples/nasm_manual.rb +136 -0
- data/lib/rsec/helpers.rb +3 -3
- data/lib/rsec/parsers/misc.rb +2 -2
- data/lib/rsec/utils.rb +1 -0
- data/readme.rdoc +1 -1
- data/test/test_misc.rb +4 -0
- metadata +28 -18
@@ -0,0 +1,167 @@
|
|
1
|
+
# a markdown translator
|
2
|
+
#
|
3
|
+
# The differences between this and original markdown:
|
4
|
+
# - markdown in inline tags are not processed
|
5
|
+
# - every line-break in non-tag parts is translated into <br/>
|
6
|
+
# - nested list elements are not supported
|
7
|
+
|
8
|
+
require "rsec"
|
9
|
+
|
10
|
+
class LittleMarkdown
|
11
|
+
include Rsec::Helper
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@markdown_line_translator = make_markdown_line_translator
|
15
|
+
@parser = (make_xml_tag_parser | make_char_parser).star.eof
|
16
|
+
end
|
17
|
+
|
18
|
+
def translate src
|
19
|
+
@stack = []
|
20
|
+
@charsbuf = ''
|
21
|
+
@out = ''
|
22
|
+
@parser.parse! src
|
23
|
+
flush_chars
|
24
|
+
@out
|
25
|
+
end
|
26
|
+
|
27
|
+
def flush_chars
|
28
|
+
@out.<< translate_markdown @charsbuf
|
29
|
+
@charsbuf = ''
|
30
|
+
end
|
31
|
+
|
32
|
+
def make_char_parser
|
33
|
+
# care stringscanner's bug, see issues
|
34
|
+
(/./.r | /\n/).fail('char'){|c| @charsbuf << c}
|
35
|
+
end
|
36
|
+
|
37
|
+
# make a single-line markdown parser
|
38
|
+
def make_markdown_line_translator
|
39
|
+
line_text = lazy{line}.map{|tokens|
|
40
|
+
tokens.empty? ? Rsec::INVALID : tokens.join # filter out empty
|
41
|
+
}
|
42
|
+
|
43
|
+
title = /"[^"]*"|'[^']*'/.r._?{|(s)|
|
44
|
+
s ? "title=#{s}" : ''
|
45
|
+
}
|
46
|
+
img = seq('!['.r >> /[^\]]+/ << '](', /[^\)"']+/, title, ')'){|(txt, path, title)|
|
47
|
+
"<img src='#{path}' #{title}>#{txt}</img>"
|
48
|
+
}
|
49
|
+
link = seq(('['.r >> /[^\]]+/ << ']('), /[^\)"']+/, title, ')'){|(txt, path, title)|
|
50
|
+
"<a href='#{path}' #{title}>#{txt}</a>"
|
51
|
+
}
|
52
|
+
# NOTE strong should be left of em
|
53
|
+
strong = ('**'.r >> line_text << '**').map{|s|
|
54
|
+
"<strong>#{s}</strong>"
|
55
|
+
}
|
56
|
+
em = ('*'.r >> line_text << '*').map{|s|
|
57
|
+
"<em>#{s}</em>"
|
58
|
+
}
|
59
|
+
code = ('`'.r >> /[^`]+/ << '`').map{|s|
|
60
|
+
"<code>#{s}</code>"
|
61
|
+
}
|
62
|
+
escape = '<'.r{'<'} | '&'.r{'&'} | /\\[\!\`\*\[\]]/.r{|s|s[1]}
|
63
|
+
text = /[^\!\`\*\[\]]+/
|
64
|
+
id = seq_(('['.r >> /[^\]]+/ << ']:'), text){|(id, text)|
|
65
|
+
"<span id='#{id}'>#{text}</span>"
|
66
|
+
}
|
67
|
+
line = (img | link | strong | em | code | escape | id | text).star
|
68
|
+
line.eof.map &:join
|
69
|
+
end
|
70
|
+
|
71
|
+
# pseudo xml tag parser, except <br> and <hr> and <script>
|
72
|
+
def make_xml_tag_parser
|
73
|
+
name = /[\w-]+/ # greedy, no need to worry space between first attr
|
74
|
+
value = /"[^"]*"|'[^']*'/
|
75
|
+
attr = seq_(name, seq_('=', value)._?)
|
76
|
+
attrs = /\s*/.r.join(attr)
|
77
|
+
|
78
|
+
# use a stack to ensure tag matching
|
79
|
+
tag_start = seq('<', name, attrs){|res|
|
80
|
+
@stack.push res[1].downcase
|
81
|
+
res
|
82
|
+
}
|
83
|
+
tag_empty_end = '/>'.r{|res|
|
84
|
+
@stack.pop
|
85
|
+
res
|
86
|
+
}
|
87
|
+
tag_non_empty_end = seq('>', lazy{content}, '</', name, /\s*\>/){|res|
|
88
|
+
if @stack.pop == res[3].downcase
|
89
|
+
res
|
90
|
+
else
|
91
|
+
Rsec::INVALID
|
92
|
+
end
|
93
|
+
}
|
94
|
+
special_tag = /\<[bh]r\s*\>/i.r | seq_('<script', attrs, /\>.*?\<\/script\>/)
|
95
|
+
tag = special_tag | seq(tag_start, (tag_empty_end | tag_non_empty_end))
|
96
|
+
|
97
|
+
# xml content
|
98
|
+
comment = /<!--([^-]|-[^-])*-->/
|
99
|
+
cdata = /<!\[CDATA\[.*?\]\]>/x
|
100
|
+
entity = /&(nbsp|lt|gt|amp|cent|pound|yen|euro|sect|copy|reg|trade|#[a-f0-9]{2,4});/i
|
101
|
+
text = /[^<&]+/
|
102
|
+
content = (cdata.r | comment | entity | tag | text).star
|
103
|
+
tag.fail('tag'){|res|
|
104
|
+
if @charsbuf.end_with? "\n"
|
105
|
+
flush_chars
|
106
|
+
@out << res.join
|
107
|
+
else
|
108
|
+
@charsbuf << res.join # inline tags
|
109
|
+
end
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
# translate markdown
|
114
|
+
def translate_markdown str
|
115
|
+
lines = str.split("\n").chunk{|line|
|
116
|
+
line[/^(\ {4}|\#{1,6}\ |[\+\-\>]\ |)/]
|
117
|
+
}.map{|(leading, lines)|
|
118
|
+
case leading
|
119
|
+
when ' ' # code
|
120
|
+
"<pre><code>#{lines.join "\n"}</code></pre>"
|
121
|
+
when /\#{1,6}/ # headings
|
122
|
+
hn = "h#{leading.strip.size}"
|
123
|
+
lines.map! do |line|
|
124
|
+
line = line.sub(/\#{1,6}/, '')
|
125
|
+
"<#{hn}>#{@markdown_line_translator.parse! line}</#{hn}>"
|
126
|
+
end
|
127
|
+
lines.join
|
128
|
+
when '> ' # block quote
|
129
|
+
# TODO nested
|
130
|
+
lines.map! do |line|
|
131
|
+
@markdown_line_translator.parse! line[2..-1]
|
132
|
+
end
|
133
|
+
"<blockquote>#{lines.join '<br/>'}</blockquote>"
|
134
|
+
when '+ ' # numbered list
|
135
|
+
# TODO nested
|
136
|
+
lines.map! do |line|
|
137
|
+
"<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
|
138
|
+
end
|
139
|
+
"<ol>#{lines.join}</ol>"
|
140
|
+
when '- ' # unordered list
|
141
|
+
# TODO nested
|
142
|
+
lines.map! do |line|
|
143
|
+
"<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
|
144
|
+
end
|
145
|
+
"<ul>#{lines.join}</ul>"
|
146
|
+
else
|
147
|
+
lines.map! do |line|
|
148
|
+
@markdown_line_translator.parse! line
|
149
|
+
end
|
150
|
+
lines.join "<br/>"
|
151
|
+
end
|
152
|
+
}
|
153
|
+
# add trailing '\n' s
|
154
|
+
lines.join('<br/>') << ('<br/>' * str[/\n*\Z/].size)
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
if __FILE__ == $PROGRAM_NAME
|
160
|
+
lm = LittleMarkdown.new
|
161
|
+
puts lm.translate <<-MD
|
162
|
+
## *a *
|
163
|
+
<pre a="3">123afd</pre>
|
164
|
+
** b **
|
165
|
+
MD
|
166
|
+
end
|
167
|
+
|
@@ -0,0 +1,136 @@
|
|
1
|
+
# Parse NASM manual [nasm.txt] and generate a list of opcodes.
|
2
|
+
# Results are saved in [nasm_codes.txt], undocumented codes are printed.
|
3
|
+
# Further: extend the parser to generate an X86 assembler.
|
4
|
+
require "rsec"
|
5
|
+
|
6
|
+
module NASMManualParser
|
7
|
+
include Rsec::Helper
|
8
|
+
extend self
|
9
|
+
|
10
|
+
Instructions = {}
|
11
|
+
|
12
|
+
class UnSupportedError < RuntimeError
|
13
|
+
end
|
14
|
+
|
15
|
+
class Instruction < Struct.new(:nemonic, :operands, :code, :archs)
|
16
|
+
end
|
17
|
+
|
18
|
+
def debug parser, *strs
|
19
|
+
return parser unless $debug
|
20
|
+
strs.each do |str|
|
21
|
+
parser.eof.parse! str
|
22
|
+
end
|
23
|
+
parser
|
24
|
+
end
|
25
|
+
|
26
|
+
def reg_parser
|
27
|
+
gp_reg = /E?[ABCD]X|E?(SP|BP|SI|DI)/
|
28
|
+
gp_reg8 = /[ABCD][HL]/
|
29
|
+
seg_reg = /ES|CS|SS|DS|FS|GS/
|
30
|
+
fpu_reg = /ST[0-7]/
|
31
|
+
mmx_reg = /MM[0-7]/
|
32
|
+
xr_reg = /CR[0234]|DR[012367]|TR[34567]/
|
33
|
+
reg = gp_reg.r | gp_reg8 | seg_reg | fpu_reg | mmx_reg | xr_reg
|
34
|
+
debug reg, 'AX'
|
35
|
+
end
|
36
|
+
|
37
|
+
def operands_parser
|
38
|
+
imm_class = /imm:imm(32|16)|imm(32|16|8)?/
|
39
|
+
mem_class = /mem(80|64|32|16|8)?/ # be ware of the order
|
40
|
+
reg_class = /reg(32|16|8)|(fpu|mmx|seg)reg/
|
41
|
+
memoffs_class = /memoffs(32|16|8)/
|
42
|
+
tr_class = 'TR3/4/5/6/7'
|
43
|
+
classes = (imm_class.r | memoffs_class | mem_class | reg_class | tr_class).fail 'operand class'
|
44
|
+
reg = reg_parser.fail 'register'
|
45
|
+
num = /\d/.r(&:to_i).fail 'num'
|
46
|
+
# memoffs should be left of mem
|
47
|
+
operand = classes | reg | num
|
48
|
+
operands = operand.join('/').even.join(',').even
|
49
|
+
debug operands, 'reg32', 'AX,memoffs16'
|
50
|
+
end
|
51
|
+
|
52
|
+
def code_parser
|
53
|
+
plus_cc = /[0-9A-F][0-9A-F]\+cc/
|
54
|
+
plus_r = /[0-9A-F][0-9A-F]\+r/
|
55
|
+
hex = /[0-9A-F][0-9A-F]/.r {|s| s.to_i 16}
|
56
|
+
slash = /\/[\dr]/
|
57
|
+
imm_code = /i[bwd]/
|
58
|
+
reg_code = /rw\/rd|r[bwd]/
|
59
|
+
ref_code = /ow\/od|o[wd]/
|
60
|
+
prefix_code = /[oa](32|16)/
|
61
|
+
code =\
|
62
|
+
(plus_cc.r | plus_r | hex | slash |
|
63
|
+
imm_code | reg_code | ref_code | prefix_code).join(/\s+/).even
|
64
|
+
debug code, 'o32 0F C8+r', 'o32 6B /r ib', 'o16 A1 ow/od'
|
65
|
+
end
|
66
|
+
|
67
|
+
def archs_parser
|
68
|
+
arch = symbol(/8086|186|286|386|486|PENT|P6|CYRIX|FPU|MMX|PRIV|UNDOC/)
|
69
|
+
archs = ('['.r >> arch.join(',').even << ']').map do |archs|
|
70
|
+
# map to set
|
71
|
+
archs.inject({}){|h, arch|
|
72
|
+
raise UnSupportedError, 'not implemented' if arch == 'UNDOC'
|
73
|
+
h[arch] = true
|
74
|
+
h
|
75
|
+
}
|
76
|
+
end
|
77
|
+
debug archs, '[386,FPU]'
|
78
|
+
end
|
79
|
+
|
80
|
+
def instruction_parser
|
81
|
+
nemonic = /[A-Z]\w+|xxSAR/
|
82
|
+
operands = operands_parser._?
|
83
|
+
code = ';'.r >> code_parser
|
84
|
+
archs = archs_parser
|
85
|
+
instruction = seq_ nemonic, operands, code, archs do |nemonic, (operands), code, archs|
|
86
|
+
Instruction.new nemonic, operands, code, archs
|
87
|
+
end
|
88
|
+
debug instruction, 'FISUBR mem32 ; DA /5 [8086,FPU]', 'BSWAP reg32 ; o32 0F C8+r [486]'
|
89
|
+
end
|
90
|
+
|
91
|
+
def desugar line
|
92
|
+
# r/m short hands
|
93
|
+
line = line.gsub /r\/m(32|16|8)/, 'reg\1/mem\1'
|
94
|
+
line.gsub! 'r/m64', 'mmxreg/mem64'
|
95
|
+
# compress space
|
96
|
+
line.sub! /\s(TO|NEAR|FAR|SHORT)/, '_\1'
|
97
|
+
line
|
98
|
+
end
|
99
|
+
|
100
|
+
def parse_line parser, line
|
101
|
+
parser.parse! desugar line
|
102
|
+
rescue Rsec::SyntaxError
|
103
|
+
rescue UnSupportedError
|
104
|
+
end
|
105
|
+
|
106
|
+
def parse filename
|
107
|
+
parsed = ''
|
108
|
+
parser = instruction_parser.eof
|
109
|
+
src = File.read filename
|
110
|
+
src.lines.with_index do |raw_line, idx|
|
111
|
+
line = raw_line.strip
|
112
|
+
# this shapy shows the line is something defining an nemonic
|
113
|
+
if line =~ /^\w+\s+[^;\[]+;\ [^;\[]+\[.+\]$/
|
114
|
+
if (parse_line parser, line)
|
115
|
+
parsed << raw_line
|
116
|
+
else
|
117
|
+
puts "unparsed:#{idx}\t#{line}"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
parsed
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
125
|
+
|
126
|
+
if __FILE__ == $PROGRAM_NAME
|
127
|
+
$debug = true
|
128
|
+
manual = "#{File.dirname __FILE__}/nasm_manual.txt"
|
129
|
+
codes = "#{File.dirname __FILE__}/nasm_codes.txt"
|
130
|
+
File.open codes, 'w' do |file|
|
131
|
+
file.<< NASMManualParser.parse manual
|
132
|
+
end
|
133
|
+
puts '-' * 80
|
134
|
+
puts "X86 asm codes are saved to #{codes}"
|
135
|
+
end
|
136
|
+
|
data/lib/rsec/helpers.rb
CHANGED
@@ -212,7 +212,7 @@ module Rsec #:nodoc:
|
|
212
212
|
|
213
213
|
# @ desc
|
214
214
|
# Repeat n or in a range.
|
215
|
-
# If range.end
|
215
|
+
# If range.end < 0, repeat at least range.begin
|
216
216
|
# (Infinity and -Infinity are considered)
|
217
217
|
def * n, &p
|
218
218
|
# FIXME if self is an epsilon parser, will cause infinite loop
|
@@ -329,8 +329,8 @@ module Rsec #:nodoc:
|
|
329
329
|
# @ desc
|
330
330
|
# Think about "innerHTML"!
|
331
331
|
# @ example
|
332
|
-
# parser = seq('
|
333
|
-
# parser.parse('
|
332
|
+
# parser = seq('<b>', /[\w\s]+/, '</b>').inner
|
333
|
+
# parser.parse('<b>the inside</b>')
|
334
334
|
def inner &p
|
335
335
|
Inner[self].map p
|
336
336
|
end
|
data/lib/rsec/parsers/misc.rb
CHANGED
@@ -14,8 +14,6 @@ module Rsec #:nodoc
|
|
14
14
|
class Fail < Binary
|
15
15
|
def Fail.[] left, tokens
|
16
16
|
# TODO mutex
|
17
|
-
@mask_bit ||= 0
|
18
|
-
@token_table ||= []
|
19
17
|
if @mask_bit > 1000
|
20
18
|
raise "You've created too many fail parsers, If it is your intention, call Rsec::Fail.reset when previous expect settings can be thrown away."
|
21
19
|
end
|
@@ -29,6 +27,7 @@ module Rsec #:nodoc
|
|
29
27
|
@mask_bit = 0
|
30
28
|
@token_table = []
|
31
29
|
end
|
30
|
+
Fail.reset
|
32
31
|
|
33
32
|
def Fail.get_tokens mask
|
34
33
|
res = []
|
@@ -116,6 +115,7 @@ module Rsec #:nodoc
|
|
116
115
|
end
|
117
116
|
|
118
117
|
# should be end-of-file after parsing
|
118
|
+
# FIXME seems parser keeps a state when using parse!, see nasm manual parse
|
119
119
|
class Eof < Unary
|
120
120
|
def _parse ctx
|
121
121
|
ret = some()._parse ctx
|
data/lib/rsec/utils.rb
CHANGED
data/readme.rdoc
CHANGED
data/test/test_misc.rb
CHANGED
metadata
CHANGED
@@ -1,24 +1,28 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rsec
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.2
|
3
|
+
version: !ruby/object:Gem::Version
|
5
4
|
prerelease:
|
5
|
+
version: 0.3.6
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- NS
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
12
|
+
|
13
|
+
date: 2011-03-08 00:00:00 +08:00
|
13
14
|
default_executable:
|
14
15
|
dependencies: []
|
16
|
+
|
15
17
|
description: Easy and extreme fast dynamic PEG parser combinator.
|
16
18
|
email:
|
17
19
|
executables: []
|
20
|
+
|
18
21
|
extensions: []
|
19
|
-
|
22
|
+
|
23
|
+
extra_rdoc_files:
|
20
24
|
- readme.rdoc
|
21
|
-
files:
|
25
|
+
files:
|
22
26
|
- license.txt
|
23
27
|
- readme.rdoc
|
24
28
|
- lib/rsec/helpers.rb
|
@@ -33,6 +37,8 @@ files:
|
|
33
37
|
- examples/arithmetic.rb
|
34
38
|
- examples/bnf.rb
|
35
39
|
- examples/c_minus.rb
|
40
|
+
- examples/little_markdown.rb
|
41
|
+
- examples/nasm_manual.rb
|
36
42
|
- examples/scheme.rb
|
37
43
|
- examples/slow_json.rb
|
38
44
|
- examples/s_exp.rb
|
@@ -55,26 +61,30 @@ files:
|
|
55
61
|
has_rdoc: true
|
56
62
|
homepage: http://rsec.heroku.com
|
57
63
|
licenses: []
|
64
|
+
|
58
65
|
post_install_message:
|
59
66
|
rdoc_options: []
|
60
|
-
|
67
|
+
|
68
|
+
require_paths:
|
61
69
|
- lib
|
62
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
71
|
none: false
|
64
|
-
requirements:
|
65
|
-
- -
|
66
|
-
- !ruby/object:Gem::Version
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
67
75
|
version: 1.9.1
|
68
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
77
|
none: false
|
70
|
-
requirements:
|
71
|
-
- -
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
version:
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: "0"
|
74
82
|
requirements: []
|
83
|
+
|
75
84
|
rubyforge_project:
|
76
|
-
rubygems_version: 1.
|
85
|
+
rubygems_version: 1.6.1
|
77
86
|
signing_key:
|
78
87
|
specification_version: 3
|
79
88
|
summary: Extreme Fast Parser Combinator for Ruby
|
80
89
|
test_files: []
|
90
|
+
|