rsec 0.3.2 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/examples/little_markdown.rb +167 -0
- data/examples/nasm_manual.rb +136 -0
- data/lib/rsec/helpers.rb +3 -3
- data/lib/rsec/parsers/misc.rb +2 -2
- data/lib/rsec/utils.rb +1 -0
- data/readme.rdoc +1 -1
- data/test/test_misc.rb +4 -0
- metadata +28 -18
@@ -0,0 +1,167 @@
|
|
1
|
+
# a markdown translator
|
2
|
+
#
|
3
|
+
# The differences between this and original markdown:
|
4
|
+
# - markdown in inline tags are not processed
|
5
|
+
# - every line-break in non-tag parts is translated into <br/>
|
6
|
+
# - nested list elements are not supported
|
7
|
+
|
8
|
+
require "rsec"
|
9
|
+
|
10
|
+
class LittleMarkdown
|
11
|
+
include Rsec::Helper
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@markdown_line_translator = make_markdown_line_translator
|
15
|
+
@parser = (make_xml_tag_parser | make_char_parser).star.eof
|
16
|
+
end
|
17
|
+
|
18
|
+
def translate src
|
19
|
+
@stack = []
|
20
|
+
@charsbuf = ''
|
21
|
+
@out = ''
|
22
|
+
@parser.parse! src
|
23
|
+
flush_chars
|
24
|
+
@out
|
25
|
+
end
|
26
|
+
|
27
|
+
def flush_chars
|
28
|
+
@out.<< translate_markdown @charsbuf
|
29
|
+
@charsbuf = ''
|
30
|
+
end
|
31
|
+
|
32
|
+
def make_char_parser
|
33
|
+
# care stringscanner's bug, see issues
|
34
|
+
(/./.r | /\n/).fail('char'){|c| @charsbuf << c}
|
35
|
+
end
|
36
|
+
|
37
|
+
# make a single-line markdown parser
|
38
|
+
def make_markdown_line_translator
|
39
|
+
line_text = lazy{line}.map{|tokens|
|
40
|
+
tokens.empty? ? Rsec::INVALID : tokens.join # filter out empty
|
41
|
+
}
|
42
|
+
|
43
|
+
title = /"[^"]*"|'[^']*'/.r._?{|(s)|
|
44
|
+
s ? "title=#{s}" : ''
|
45
|
+
}
|
46
|
+
img = seq('!['.r >> /[^\]]+/ << '](', /[^\)"']+/, title, ')'){|(txt, path, title)|
|
47
|
+
"<img src='#{path}' #{title}>#{txt}</img>"
|
48
|
+
}
|
49
|
+
link = seq(('['.r >> /[^\]]+/ << ']('), /[^\)"']+/, title, ')'){|(txt, path, title)|
|
50
|
+
"<a href='#{path}' #{title}>#{txt}</a>"
|
51
|
+
}
|
52
|
+
# NOTE strong should be left of em
|
53
|
+
strong = ('**'.r >> line_text << '**').map{|s|
|
54
|
+
"<strong>#{s}</strong>"
|
55
|
+
}
|
56
|
+
em = ('*'.r >> line_text << '*').map{|s|
|
57
|
+
"<em>#{s}</em>"
|
58
|
+
}
|
59
|
+
code = ('`'.r >> /[^`]+/ << '`').map{|s|
|
60
|
+
"<code>#{s}</code>"
|
61
|
+
}
|
62
|
+
escape = '<'.r{'<'} | '&'.r{'&'} | /\\[\!\`\*\[\]]/.r{|s|s[1]}
|
63
|
+
text = /[^\!\`\*\[\]]+/
|
64
|
+
id = seq_(('['.r >> /[^\]]+/ << ']:'), text){|(id, text)|
|
65
|
+
"<span id='#{id}'>#{text}</span>"
|
66
|
+
}
|
67
|
+
line = (img | link | strong | em | code | escape | id | text).star
|
68
|
+
line.eof.map &:join
|
69
|
+
end
|
70
|
+
|
71
|
+
# pseudo xml tag parser, except <br> and <hr> and <script>
|
72
|
+
def make_xml_tag_parser
|
73
|
+
name = /[\w-]+/ # greedy, no need to worry space between first attr
|
74
|
+
value = /"[^"]*"|'[^']*'/
|
75
|
+
attr = seq_(name, seq_('=', value)._?)
|
76
|
+
attrs = /\s*/.r.join(attr)
|
77
|
+
|
78
|
+
# use a stack to ensure tag matching
|
79
|
+
tag_start = seq('<', name, attrs){|res|
|
80
|
+
@stack.push res[1].downcase
|
81
|
+
res
|
82
|
+
}
|
83
|
+
tag_empty_end = '/>'.r{|res|
|
84
|
+
@stack.pop
|
85
|
+
res
|
86
|
+
}
|
87
|
+
tag_non_empty_end = seq('>', lazy{content}, '</', name, /\s*\>/){|res|
|
88
|
+
if @stack.pop == res[3].downcase
|
89
|
+
res
|
90
|
+
else
|
91
|
+
Rsec::INVALID
|
92
|
+
end
|
93
|
+
}
|
94
|
+
special_tag = /\<[bh]r\s*\>/i.r | seq_('<script', attrs, /\>.*?\<\/script\>/)
|
95
|
+
tag = special_tag | seq(tag_start, (tag_empty_end | tag_non_empty_end))
|
96
|
+
|
97
|
+
# xml content
|
98
|
+
comment = /<!--([^-]|-[^-])*-->/
|
99
|
+
cdata = /<!\[CDATA\[.*?\]\]>/x
|
100
|
+
entity = /&(nbsp|lt|gt|amp|cent|pound|yen|euro|sect|copy|reg|trade|#[a-f0-9]{2,4});/i
|
101
|
+
text = /[^<&]+/
|
102
|
+
content = (cdata.r | comment | entity | tag | text).star
|
103
|
+
tag.fail('tag'){|res|
|
104
|
+
if @charsbuf.end_with? "\n"
|
105
|
+
flush_chars
|
106
|
+
@out << res.join
|
107
|
+
else
|
108
|
+
@charsbuf << res.join # inline tags
|
109
|
+
end
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
# translate markdown
|
114
|
+
def translate_markdown str
|
115
|
+
lines = str.split("\n").chunk{|line|
|
116
|
+
line[/^(\ {4}|\#{1,6}\ |[\+\-\>]\ |)/]
|
117
|
+
}.map{|(leading, lines)|
|
118
|
+
case leading
|
119
|
+
when ' ' # code
|
120
|
+
"<pre><code>#{lines.join "\n"}</code></pre>"
|
121
|
+
when /\#{1,6}/ # headings
|
122
|
+
hn = "h#{leading.strip.size}"
|
123
|
+
lines.map! do |line|
|
124
|
+
line = line.sub(/\#{1,6}/, '')
|
125
|
+
"<#{hn}>#{@markdown_line_translator.parse! line}</#{hn}>"
|
126
|
+
end
|
127
|
+
lines.join
|
128
|
+
when '> ' # block quote
|
129
|
+
# TODO nested
|
130
|
+
lines.map! do |line|
|
131
|
+
@markdown_line_translator.parse! line[2..-1]
|
132
|
+
end
|
133
|
+
"<blockquote>#{lines.join '<br/>'}</blockquote>"
|
134
|
+
when '+ ' # numbered list
|
135
|
+
# TODO nested
|
136
|
+
lines.map! do |line|
|
137
|
+
"<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
|
138
|
+
end
|
139
|
+
"<ol>#{lines.join}</ol>"
|
140
|
+
when '- ' # unordered list
|
141
|
+
# TODO nested
|
142
|
+
lines.map! do |line|
|
143
|
+
"<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
|
144
|
+
end
|
145
|
+
"<ul>#{lines.join}</ul>"
|
146
|
+
else
|
147
|
+
lines.map! do |line|
|
148
|
+
@markdown_line_translator.parse! line
|
149
|
+
end
|
150
|
+
lines.join "<br/>"
|
151
|
+
end
|
152
|
+
}
|
153
|
+
# add trailing '\n' s
|
154
|
+
lines.join('<br/>') << ('<br/>' * str[/\n*\Z/].size)
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
if __FILE__ == $PROGRAM_NAME
|
160
|
+
lm = LittleMarkdown.new
|
161
|
+
puts lm.translate <<-MD
|
162
|
+
## *a *
|
163
|
+
<pre a="3">123afd</pre>
|
164
|
+
** b **
|
165
|
+
MD
|
166
|
+
end
|
167
|
+
|
@@ -0,0 +1,136 @@
|
|
1
|
+
# Parse NASM manual [nasm.txt] and generate a list of opcodes.
|
2
|
+
# Results are saved in [nasm_codes.txt], undocumented codes are printed.
|
3
|
+
# Further: extend the parser to generate an X86 assembler.
|
4
|
+
require "rsec"
|
5
|
+
|
6
|
+
module NASMManualParser
|
7
|
+
include Rsec::Helper
|
8
|
+
extend self
|
9
|
+
|
10
|
+
Instructions = {}
|
11
|
+
|
12
|
+
class UnSupportedError < RuntimeError
|
13
|
+
end
|
14
|
+
|
15
|
+
class Instruction < Struct.new(:nemonic, :operands, :code, :archs)
|
16
|
+
end
|
17
|
+
|
18
|
+
def debug parser, *strs
|
19
|
+
return parser unless $debug
|
20
|
+
strs.each do |str|
|
21
|
+
parser.eof.parse! str
|
22
|
+
end
|
23
|
+
parser
|
24
|
+
end
|
25
|
+
|
26
|
+
def reg_parser
|
27
|
+
gp_reg = /E?[ABCD]X|E?(SP|BP|SI|DI)/
|
28
|
+
gp_reg8 = /[ABCD][HL]/
|
29
|
+
seg_reg = /ES|CS|SS|DS|FS|GS/
|
30
|
+
fpu_reg = /ST[0-7]/
|
31
|
+
mmx_reg = /MM[0-7]/
|
32
|
+
xr_reg = /CR[0234]|DR[012367]|TR[34567]/
|
33
|
+
reg = gp_reg.r | gp_reg8 | seg_reg | fpu_reg | mmx_reg | xr_reg
|
34
|
+
debug reg, 'AX'
|
35
|
+
end
|
36
|
+
|
37
|
+
def operands_parser
|
38
|
+
imm_class = /imm:imm(32|16)|imm(32|16|8)?/
|
39
|
+
mem_class = /mem(80|64|32|16|8)?/ # be ware of the order
|
40
|
+
reg_class = /reg(32|16|8)|(fpu|mmx|seg)reg/
|
41
|
+
memoffs_class = /memoffs(32|16|8)/
|
42
|
+
tr_class = 'TR3/4/5/6/7'
|
43
|
+
classes = (imm_class.r | memoffs_class | mem_class | reg_class | tr_class).fail 'operand class'
|
44
|
+
reg = reg_parser.fail 'register'
|
45
|
+
num = /\d/.r(&:to_i).fail 'num'
|
46
|
+
# memoffs should be left of mem
|
47
|
+
operand = classes | reg | num
|
48
|
+
operands = operand.join('/').even.join(',').even
|
49
|
+
debug operands, 'reg32', 'AX,memoffs16'
|
50
|
+
end
|
51
|
+
|
52
|
+
def code_parser
|
53
|
+
plus_cc = /[0-9A-F][0-9A-F]\+cc/
|
54
|
+
plus_r = /[0-9A-F][0-9A-F]\+r/
|
55
|
+
hex = /[0-9A-F][0-9A-F]/.r {|s| s.to_i 16}
|
56
|
+
slash = /\/[\dr]/
|
57
|
+
imm_code = /i[bwd]/
|
58
|
+
reg_code = /rw\/rd|r[bwd]/
|
59
|
+
ref_code = /ow\/od|o[wd]/
|
60
|
+
prefix_code = /[oa](32|16)/
|
61
|
+
code =\
|
62
|
+
(plus_cc.r | plus_r | hex | slash |
|
63
|
+
imm_code | reg_code | ref_code | prefix_code).join(/\s+/).even
|
64
|
+
debug code, 'o32 0F C8+r', 'o32 6B /r ib', 'o16 A1 ow/od'
|
65
|
+
end
|
66
|
+
|
67
|
+
def archs_parser
|
68
|
+
arch = symbol(/8086|186|286|386|486|PENT|P6|CYRIX|FPU|MMX|PRIV|UNDOC/)
|
69
|
+
archs = ('['.r >> arch.join(',').even << ']').map do |archs|
|
70
|
+
# map to set
|
71
|
+
archs.inject({}){|h, arch|
|
72
|
+
raise UnSupportedError, 'not implemented' if arch == 'UNDOC'
|
73
|
+
h[arch] = true
|
74
|
+
h
|
75
|
+
}
|
76
|
+
end
|
77
|
+
debug archs, '[386,FPU]'
|
78
|
+
end
|
79
|
+
|
80
|
+
def instruction_parser
|
81
|
+
nemonic = /[A-Z]\w+|xxSAR/
|
82
|
+
operands = operands_parser._?
|
83
|
+
code = ';'.r >> code_parser
|
84
|
+
archs = archs_parser
|
85
|
+
instruction = seq_ nemonic, operands, code, archs do |nemonic, (operands), code, archs|
|
86
|
+
Instruction.new nemonic, operands, code, archs
|
87
|
+
end
|
88
|
+
debug instruction, 'FISUBR mem32 ; DA /5 [8086,FPU]', 'BSWAP reg32 ; o32 0F C8+r [486]'
|
89
|
+
end
|
90
|
+
|
91
|
+
def desugar line
|
92
|
+
# r/m short hands
|
93
|
+
line = line.gsub /r\/m(32|16|8)/, 'reg\1/mem\1'
|
94
|
+
line.gsub! 'r/m64', 'mmxreg/mem64'
|
95
|
+
# compress space
|
96
|
+
line.sub! /\s(TO|NEAR|FAR|SHORT)/, '_\1'
|
97
|
+
line
|
98
|
+
end
|
99
|
+
|
100
|
+
def parse_line parser, line
|
101
|
+
parser.parse! desugar line
|
102
|
+
rescue Rsec::SyntaxError
|
103
|
+
rescue UnSupportedError
|
104
|
+
end
|
105
|
+
|
106
|
+
def parse filename
|
107
|
+
parsed = ''
|
108
|
+
parser = instruction_parser.eof
|
109
|
+
src = File.read filename
|
110
|
+
src.lines.with_index do |raw_line, idx|
|
111
|
+
line = raw_line.strip
|
112
|
+
# this shapy shows the line is something defining an nemonic
|
113
|
+
if line =~ /^\w+\s+[^;\[]+;\ [^;\[]+\[.+\]$/
|
114
|
+
if (parse_line parser, line)
|
115
|
+
parsed << raw_line
|
116
|
+
else
|
117
|
+
puts "unparsed:#{idx}\t#{line}"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
parsed
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
125
|
+
|
126
|
+
if __FILE__ == $PROGRAM_NAME
|
127
|
+
$debug = true
|
128
|
+
manual = "#{File.dirname __FILE__}/nasm_manual.txt"
|
129
|
+
codes = "#{File.dirname __FILE__}/nasm_codes.txt"
|
130
|
+
File.open codes, 'w' do |file|
|
131
|
+
file.<< NASMManualParser.parse manual
|
132
|
+
end
|
133
|
+
puts '-' * 80
|
134
|
+
puts "X86 asm codes are saved to #{codes}"
|
135
|
+
end
|
136
|
+
|
data/lib/rsec/helpers.rb
CHANGED
@@ -212,7 +212,7 @@ module Rsec #:nodoc:
|
|
212
212
|
|
213
213
|
# @ desc
|
214
214
|
# Repeat n or in a range.
|
215
|
-
# If range.end
|
215
|
+
# If range.end < 0, repeat at least range.begin
|
216
216
|
# (Infinity and -Infinity are considered)
|
217
217
|
def * n, &p
|
218
218
|
# FIXME if self is an epsilon parser, will cause infinite loop
|
@@ -329,8 +329,8 @@ module Rsec #:nodoc:
|
|
329
329
|
# @ desc
|
330
330
|
# Think about "innerHTML"!
|
331
331
|
# @ example
|
332
|
-
# parser = seq('
|
333
|
-
# parser.parse('
|
332
|
+
# parser = seq('<b>', /[\w\s]+/, '</b>').inner
|
333
|
+
# parser.parse('<b>the inside</b>')
|
334
334
|
def inner &p
|
335
335
|
Inner[self].map p
|
336
336
|
end
|
data/lib/rsec/parsers/misc.rb
CHANGED
@@ -14,8 +14,6 @@ module Rsec #:nodoc
|
|
14
14
|
class Fail < Binary
|
15
15
|
def Fail.[] left, tokens
|
16
16
|
# TODO mutex
|
17
|
-
@mask_bit ||= 0
|
18
|
-
@token_table ||= []
|
19
17
|
if @mask_bit > 1000
|
20
18
|
raise "You've created too many fail parsers, If it is your intention, call Rsec::Fail.reset when previous expect settings can be thrown away."
|
21
19
|
end
|
@@ -29,6 +27,7 @@ module Rsec #:nodoc
|
|
29
27
|
@mask_bit = 0
|
30
28
|
@token_table = []
|
31
29
|
end
|
30
|
+
Fail.reset
|
32
31
|
|
33
32
|
def Fail.get_tokens mask
|
34
33
|
res = []
|
@@ -116,6 +115,7 @@ module Rsec #:nodoc
|
|
116
115
|
end
|
117
116
|
|
118
117
|
# should be end-of-file after parsing
|
118
|
+
# FIXME seems parser keeps a state when using parse!, see nasm manual parse
|
119
119
|
class Eof < Unary
|
120
120
|
def _parse ctx
|
121
121
|
ret = some()._parse ctx
|
data/lib/rsec/utils.rb
CHANGED
data/readme.rdoc
CHANGED
data/test/test_misc.rb
CHANGED
metadata
CHANGED
@@ -1,24 +1,28 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rsec
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.2
|
3
|
+
version: !ruby/object:Gem::Version
|
5
4
|
prerelease:
|
5
|
+
version: 0.3.6
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- NS
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
12
|
+
|
13
|
+
date: 2011-03-08 00:00:00 +08:00
|
13
14
|
default_executable:
|
14
15
|
dependencies: []
|
16
|
+
|
15
17
|
description: Easy and extreme fast dynamic PEG parser combinator.
|
16
18
|
email:
|
17
19
|
executables: []
|
20
|
+
|
18
21
|
extensions: []
|
19
|
-
|
22
|
+
|
23
|
+
extra_rdoc_files:
|
20
24
|
- readme.rdoc
|
21
|
-
files:
|
25
|
+
files:
|
22
26
|
- license.txt
|
23
27
|
- readme.rdoc
|
24
28
|
- lib/rsec/helpers.rb
|
@@ -33,6 +37,8 @@ files:
|
|
33
37
|
- examples/arithmetic.rb
|
34
38
|
- examples/bnf.rb
|
35
39
|
- examples/c_minus.rb
|
40
|
+
- examples/little_markdown.rb
|
41
|
+
- examples/nasm_manual.rb
|
36
42
|
- examples/scheme.rb
|
37
43
|
- examples/slow_json.rb
|
38
44
|
- examples/s_exp.rb
|
@@ -55,26 +61,30 @@ files:
|
|
55
61
|
has_rdoc: true
|
56
62
|
homepage: http://rsec.heroku.com
|
57
63
|
licenses: []
|
64
|
+
|
58
65
|
post_install_message:
|
59
66
|
rdoc_options: []
|
60
|
-
|
67
|
+
|
68
|
+
require_paths:
|
61
69
|
- lib
|
62
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
71
|
none: false
|
64
|
-
requirements:
|
65
|
-
- -
|
66
|
-
- !ruby/object:Gem::Version
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
67
75
|
version: 1.9.1
|
68
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
77
|
none: false
|
70
|
-
requirements:
|
71
|
-
- -
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
version:
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: "0"
|
74
82
|
requirements: []
|
83
|
+
|
75
84
|
rubyforge_project:
|
76
|
-
rubygems_version: 1.
|
85
|
+
rubygems_version: 1.6.1
|
77
86
|
signing_key:
|
78
87
|
specification_version: 3
|
79
88
|
summary: Extreme Fast Parser Combinator for Ruby
|
80
89
|
test_files: []
|
90
|
+
|