rsec 0.3.2 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,167 @@
1
+ # a markdown translator
2
+ #
3
+ # The differences between this and original markdown:
4
+ # - markdown in inline tags are not processed
5
+ # - every line-break in non-tag parts is translated into <br/>
6
+ # - nested list elements are not supported
7
+
8
+ require "rsec"
9
+
10
+ class LittleMarkdown
11
+ include Rsec::Helper
12
+
13
+ def initialize
14
+ @markdown_line_translator = make_markdown_line_translator
15
+ @parser = (make_xml_tag_parser | make_char_parser).star.eof
16
+ end
17
+
18
+ def translate src
19
+ @stack = []
20
+ @charsbuf = ''
21
+ @out = ''
22
+ @parser.parse! src
23
+ flush_chars
24
+ @out
25
+ end
26
+
27
+ def flush_chars
28
+ @out.<< translate_markdown @charsbuf
29
+ @charsbuf = ''
30
+ end
31
+
32
+ def make_char_parser
33
+ # care stringscanner's bug, see issues
34
+ (/./.r | /\n/).fail('char'){|c| @charsbuf << c}
35
+ end
36
+
37
+ # make a single-line markdown parser
38
+ def make_markdown_line_translator
39
+ line_text = lazy{line}.map{|tokens|
40
+ tokens.empty? ? Rsec::INVALID : tokens.join # filter out empty
41
+ }
42
+
43
+ title = /"[^"]*"|'[^']*'/.r._?{|(s)|
44
+ s ? "title=#{s}" : ''
45
+ }
46
+ img = seq('!['.r >> /[^\]]+/ << '](', /[^\)"']+/, title, ')'){|(txt, path, title)|
47
+ "<img src='#{path}' #{title}>#{txt}</img>"
48
+ }
49
+ link = seq(('['.r >> /[^\]]+/ << ']('), /[^\)"']+/, title, ')'){|(txt, path, title)|
50
+ "<a href='#{path}' #{title}>#{txt}</a>"
51
+ }
52
+ # NOTE strong should be left of em
53
+ strong = ('**'.r >> line_text << '**').map{|s|
54
+ "<strong>#{s}</strong>"
55
+ }
56
+ em = ('*'.r >> line_text << '*').map{|s|
57
+ "<em>#{s}</em>"
58
+ }
59
+ code = ('`'.r >> /[^`]+/ << '`').map{|s|
60
+ "<code>#{s}</code>"
61
+ }
62
+ escape = '<'.r{'&lt;'} | '&'.r{'&amp;'} | /\\[\!\`\*\[\]]/.r{|s|s[1]}
63
+ text = /[^\!\`\*\[\]]+/
64
+ id = seq_(('['.r >> /[^\]]+/ << ']:'), text){|(id, text)|
65
+ "<span id='#{id}'>#{text}</span>"
66
+ }
67
+ line = (img | link | strong | em | code | escape | id | text).star
68
+ line.eof.map &:join
69
+ end
70
+
71
+ # pseudo xml tag parser, except <br> and <hr> and <script>
72
+ def make_xml_tag_parser
73
+ name = /[\w-]+/ # greedy, no need to worry space between first attr
74
+ value = /"[^"]*"|'[^']*'/
75
+ attr = seq_(name, seq_('=', value)._?)
76
+ attrs = /\s*/.r.join(attr)
77
+
78
+ # use a stack to ensure tag matching
79
+ tag_start = seq('<', name, attrs){|res|
80
+ @stack.push res[1].downcase
81
+ res
82
+ }
83
+ tag_empty_end = '/>'.r{|res|
84
+ @stack.pop
85
+ res
86
+ }
87
+ tag_non_empty_end = seq('>', lazy{content}, '</', name, /\s*\>/){|res|
88
+ if @stack.pop == res[3].downcase
89
+ res
90
+ else
91
+ Rsec::INVALID
92
+ end
93
+ }
94
+ special_tag = /\<[bh]r\s*\>/i.r | seq_('<script', attrs, /\>.*?\<\/script\>/)
95
+ tag = special_tag | seq(tag_start, (tag_empty_end | tag_non_empty_end))
96
+
97
+ # xml content
98
+ comment = /<!--([^-]|-[^-])*-->/
99
+ cdata = /<!\[CDATA\[.*?\]\]>/x
100
+ entity = /&(nbsp|lt|gt|amp|cent|pound|yen|euro|sect|copy|reg|trade|#[a-f0-9]{2,4});/i
101
+ text = /[^<&]+/
102
+ content = (cdata.r | comment | entity | tag | text).star
103
+ tag.fail('tag'){|res|
104
+ if @charsbuf.end_with? "\n"
105
+ flush_chars
106
+ @out << res.join
107
+ else
108
+ @charsbuf << res.join # inline tags
109
+ end
110
+ }
111
+ end
112
+
113
+ # translate markdown
114
+ def translate_markdown str
115
+ lines = str.split("\n").chunk{|line|
116
+ line[/^(\ {4}|\#{1,6}\ |[\+\-\>]\ |)/]
117
+ }.map{|(leading, lines)|
118
+ case leading
119
+ when ' ' # code
120
+ "<pre><code>#{lines.join "\n"}</code></pre>"
121
+ when /\#{1,6}/ # headings
122
+ hn = "h#{leading.strip.size}"
123
+ lines.map! do |line|
124
+ line = line.sub(/\#{1,6}/, '')
125
+ "<#{hn}>#{@markdown_line_translator.parse! line}</#{hn}>"
126
+ end
127
+ lines.join
128
+ when '> ' # block quote
129
+ # TODO nested
130
+ lines.map! do |line|
131
+ @markdown_line_translator.parse! line[2..-1]
132
+ end
133
+ "<blockquote>#{lines.join '<br/>'}</blockquote>"
134
+ when '+ ' # numbered list
135
+ # TODO nested
136
+ lines.map! do |line|
137
+ "<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
138
+ end
139
+ "<ol>#{lines.join}</ol>"
140
+ when '- ' # unordered list
141
+ # TODO nested
142
+ lines.map! do |line|
143
+ "<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
144
+ end
145
+ "<ul>#{lines.join}</ul>"
146
+ else
147
+ lines.map! do |line|
148
+ @markdown_line_translator.parse! line
149
+ end
150
+ lines.join "<br/>"
151
+ end
152
+ }
153
+ # add trailing '\n' s
154
+ lines.join('<br/>') << ('<br/>' * str[/\n*\Z/].size)
155
+ end
156
+
157
+ end
158
+
159
+ if __FILE__ == $PROGRAM_NAME
160
+ lm = LittleMarkdown.new
161
+ puts lm.translate <<-MD
162
+ ## *a *
163
+ <pre a="3">123afd</pre>
164
+ ** b **
165
+ MD
166
+ end
167
+
@@ -0,0 +1,136 @@
1
+ # Parse NASM manual [nasm.txt] and generate a list of opcodes.
2
+ # Results are saved in [nasm_codes.txt], undocumented codes are printed.
3
+ # Further: extend the parser to generate an X86 assembler.
4
+ require "rsec"
5
+
6
+ module NASMManualParser
7
+ include Rsec::Helper
8
+ extend self
9
+
10
+ Instructions = {}
11
+
12
+ class UnSupportedError < RuntimeError
13
+ end
14
+
15
+ class Instruction < Struct.new(:nemonic, :operands, :code, :archs)
16
+ end
17
+
18
+ def debug parser, *strs
19
+ return parser unless $debug
20
+ strs.each do |str|
21
+ parser.eof.parse! str
22
+ end
23
+ parser
24
+ end
25
+
26
+ def reg_parser
27
+ gp_reg = /E?[ABCD]X|E?(SP|BP|SI|DI)/
28
+ gp_reg8 = /[ABCD][HL]/
29
+ seg_reg = /ES|CS|SS|DS|FS|GS/
30
+ fpu_reg = /ST[0-7]/
31
+ mmx_reg = /MM[0-7]/
32
+ xr_reg = /CR[0234]|DR[012367]|TR[34567]/
33
+ reg = gp_reg.r | gp_reg8 | seg_reg | fpu_reg | mmx_reg | xr_reg
34
+ debug reg, 'AX'
35
+ end
36
+
37
+ def operands_parser
38
+ imm_class = /imm:imm(32|16)|imm(32|16|8)?/
39
+ mem_class = /mem(80|64|32|16|8)?/ # be ware of the order
40
+ reg_class = /reg(32|16|8)|(fpu|mmx|seg)reg/
41
+ memoffs_class = /memoffs(32|16|8)/
42
+ tr_class = 'TR3/4/5/6/7'
43
+ classes = (imm_class.r | memoffs_class | mem_class | reg_class | tr_class).fail 'operand class'
44
+ reg = reg_parser.fail 'register'
45
+ num = /\d/.r(&:to_i).fail 'num'
46
+ # memoffs should be left of mem
47
+ operand = classes | reg | num
48
+ operands = operand.join('/').even.join(',').even
49
+ debug operands, 'reg32', 'AX,memoffs16'
50
+ end
51
+
52
+ def code_parser
53
+ plus_cc = /[0-9A-F][0-9A-F]\+cc/
54
+ plus_r = /[0-9A-F][0-9A-F]\+r/
55
+ hex = /[0-9A-F][0-9A-F]/.r {|s| s.to_i 16}
56
+ slash = /\/[\dr]/
57
+ imm_code = /i[bwd]/
58
+ reg_code = /rw\/rd|r[bwd]/
59
+ ref_code = /ow\/od|o[wd]/
60
+ prefix_code = /[oa](32|16)/
61
+ code =\
62
+ (plus_cc.r | plus_r | hex | slash |
63
+ imm_code | reg_code | ref_code | prefix_code).join(/\s+/).even
64
+ debug code, 'o32 0F C8+r', 'o32 6B /r ib', 'o16 A1 ow/od'
65
+ end
66
+
67
+ def archs_parser
68
+ arch = symbol(/8086|186|286|386|486|PENT|P6|CYRIX|FPU|MMX|PRIV|UNDOC/)
69
+ archs = ('['.r >> arch.join(',').even << ']').map do |archs|
70
+ # map to set
71
+ archs.inject({}){|h, arch|
72
+ raise UnSupportedError, 'not implemented' if arch == 'UNDOC'
73
+ h[arch] = true
74
+ h
75
+ }
76
+ end
77
+ debug archs, '[386,FPU]'
78
+ end
79
+
80
+ def instruction_parser
81
+ nemonic = /[A-Z]\w+|xxSAR/
82
+ operands = operands_parser._?
83
+ code = ';'.r >> code_parser
84
+ archs = archs_parser
85
+ instruction = seq_ nemonic, operands, code, archs do |nemonic, (operands), code, archs|
86
+ Instruction.new nemonic, operands, code, archs
87
+ end
88
+ debug instruction, 'FISUBR mem32 ; DA /5 [8086,FPU]', 'BSWAP reg32 ; o32 0F C8+r [486]'
89
+ end
90
+
91
+ def desugar line
92
+ # r/m short hands
93
+ line = line.gsub /r\/m(32|16|8)/, 'reg\1/mem\1'
94
+ line.gsub! 'r/m64', 'mmxreg/mem64'
95
+ # compress space
96
+ line.sub! /\s(TO|NEAR|FAR|SHORT)/, '_\1'
97
+ line
98
+ end
99
+
100
+ def parse_line parser, line
101
+ parser.parse! desugar line
102
+ rescue Rsec::SyntaxError
103
+ rescue UnSupportedError
104
+ end
105
+
106
+ def parse filename
107
+ parsed = ''
108
+ parser = instruction_parser.eof
109
+ src = File.read filename
110
+ src.lines.with_index do |raw_line, idx|
111
+ line = raw_line.strip
112
+ # this shapy shows the line is something defining an nemonic
113
+ if line =~ /^\w+\s+[^;\[]+;\ [^;\[]+\[.+\]$/
114
+ if (parse_line parser, line)
115
+ parsed << raw_line
116
+ else
117
+ puts "unparsed:#{idx}\t#{line}"
118
+ end
119
+ end
120
+ end
121
+ parsed
122
+ end
123
+
124
+ end
125
+
126
+ if __FILE__ == $PROGRAM_NAME
127
+ $debug = true
128
+ manual = "#{File.dirname __FILE__}/nasm_manual.txt"
129
+ codes = "#{File.dirname __FILE__}/nasm_codes.txt"
130
+ File.open codes, 'w' do |file|
131
+ file.<< NASMManualParser.parse manual
132
+ end
133
+ puts '-' * 80
134
+ puts "X86 asm codes are saved to #{codes}"
135
+ end
136
+
data/lib/rsec/helpers.rb CHANGED
@@ -212,7 +212,7 @@ module Rsec #:nodoc:
212
212
 
213
213
  # @ desc
214
214
  # Repeat n or in a range.
215
- # If range.end < 0, repeat at least range.begin
215
+ # If range.end &lt; 0, repeat at least range.begin
216
216
  # (Infinity and -Infinity are considered)
217
217
  def * n, &p
218
218
  # FIXME if self is an epsilon parser, will cause infinite loop
@@ -329,8 +329,8 @@ module Rsec #:nodoc:
329
329
  # @ desc
330
330
  # Think about "innerHTML"!
331
331
  # @ example
332
- # parser = seq('<b>', /[\w\s]+/, '</b>').inner
333
- # parser.parse('<b>the inside</b>')
332
+ # parser = seq('&lt;b&gt;', /[\w\s]+/, '&lt;/b&gt;').inner
333
+ # parser.parse('&lt;b&gt;the inside&lt;/b&gt;')
334
334
  def inner &p
335
335
  Inner[self].map p
336
336
  end
@@ -14,8 +14,6 @@ module Rsec #:nodoc
14
14
  class Fail < Binary
15
15
  def Fail.[] left, tokens
16
16
  # TODO mutex
17
- @mask_bit ||= 0
18
- @token_table ||= []
19
17
  if @mask_bit > 1000
20
18
  raise "You've created too many fail parsers, If it is your intention, call Rsec::Fail.reset when previous expect settings can be thrown away."
21
19
  end
@@ -29,6 +27,7 @@ module Rsec #:nodoc
29
27
  @mask_bit = 0
30
28
  @token_table = []
31
29
  end
30
+ Fail.reset
32
31
 
33
32
  def Fail.get_tokens mask
34
33
  res = []
@@ -116,6 +115,7 @@ module Rsec #:nodoc
116
115
  end
117
116
 
118
117
  # should be end-of-file after parsing
118
+ # FIXME seems parser keeps a state when using parse!, see nasm manual parse
119
119
  class Eof < Unary
120
120
  def _parse ctx
121
121
  ret = some()._parse ctx
data/lib/rsec/utils.rb CHANGED
@@ -92,6 +92,7 @@ module Rsec #:nodoc:
92
92
  # the text is 80 at most
93
93
  def line_text pos
94
94
  from = string.rindex "\n", pos
95
+ (from = string.rindex "\n", pos - 1) if from == pos
95
96
  from = from ? from + 1 : 0
96
97
  from = pos - 40 if (from < pos - 40)
97
98
 
data/readme.rdoc CHANGED
@@ -16,7 +16,7 @@ The pure Ruby gem is fast enough (about 10+x faster than treetop generated code)
16
16
 
17
17
  For extreme performance under C Ruby:
18
18
 
19
- gem in rsec-ext
19
+ gem in rsec-ext
20
20
 
21
21
  It is about 30% faster than Haskell Parsec in the benchmark.
22
22
 
data/test/test_misc.rb CHANGED
@@ -20,6 +20,10 @@ class TestMisc < TC
20
20
  p = ''.r.eof
21
21
  asp '', p
22
22
  ase INVALID, p.parse('a')
23
+
24
+ p = seq('a', 'b').eof
25
+ ase INVALID, p.parse('abc')
26
+ ase ['a', 'b'], p.parse('ab')
23
27
  end
24
28
 
25
29
  def test_cache
metadata CHANGED
@@ -1,24 +1,28 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rsec
3
- version: !ruby/object:Gem::Version
4
- version: 0.3.2
3
+ version: !ruby/object:Gem::Version
5
4
  prerelease:
5
+ version: 0.3.6
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - NS
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-02-24 00:00:00.000000000 +08:00
12
+
13
+ date: 2011-03-08 00:00:00 +08:00
13
14
  default_executable:
14
15
  dependencies: []
16
+
15
17
  description: Easy and extreme fast dynamic PEG parser combinator.
16
18
  email:
17
19
  executables: []
20
+
18
21
  extensions: []
19
- extra_rdoc_files:
22
+
23
+ extra_rdoc_files:
20
24
  - readme.rdoc
21
- files:
25
+ files:
22
26
  - license.txt
23
27
  - readme.rdoc
24
28
  - lib/rsec/helpers.rb
@@ -33,6 +37,8 @@ files:
33
37
  - examples/arithmetic.rb
34
38
  - examples/bnf.rb
35
39
  - examples/c_minus.rb
40
+ - examples/little_markdown.rb
41
+ - examples/nasm_manual.rb
36
42
  - examples/scheme.rb
37
43
  - examples/slow_json.rb
38
44
  - examples/s_exp.rb
@@ -55,26 +61,30 @@ files:
55
61
  has_rdoc: true
56
62
  homepage: http://rsec.heroku.com
57
63
  licenses: []
64
+
58
65
  post_install_message:
59
66
  rdoc_options: []
60
- require_paths:
67
+
68
+ require_paths:
61
69
  - lib
62
- required_ruby_version: !ruby/object:Gem::Requirement
70
+ required_ruby_version: !ruby/object:Gem::Requirement
63
71
  none: false
64
- requirements:
65
- - - ! '>='
66
- - !ruby/object:Gem::Version
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
67
75
  version: 1.9.1
68
- required_rubygems_version: !ruby/object:Gem::Requirement
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
77
  none: false
70
- requirements:
71
- - - ! '>='
72
- - !ruby/object:Gem::Version
73
- version: '0'
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: "0"
74
82
  requirements: []
83
+
75
84
  rubyforge_project:
76
- rubygems_version: 1.5.2
85
+ rubygems_version: 1.6.1
77
86
  signing_key:
78
87
  specification_version: 3
79
88
  summary: Extreme Fast Parser Combinator for Ruby
80
89
  test_files: []
90
+