rsec 0.3.2 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,167 @@
1
+ # a markdown translator
2
+ #
3
+ # The differences between this and original markdown:
4
+ # - markdown in inline tags are not processed
5
+ # - every line-break in non-tag parts is translated into <br/>
6
+ # - nested list elements are not supported
7
+
8
+ require "rsec"
9
+
10
+ class LittleMarkdown
11
+ include Rsec::Helper
12
+
13
+ def initialize
14
+ @markdown_line_translator = make_markdown_line_translator
15
+ @parser = (make_xml_tag_parser | make_char_parser).star.eof
16
+ end
17
+
18
+ def translate src
19
+ @stack = []
20
+ @charsbuf = ''
21
+ @out = ''
22
+ @parser.parse! src
23
+ flush_chars
24
+ @out
25
+ end
26
+
27
+ def flush_chars
28
+ @out.<< translate_markdown @charsbuf
29
+ @charsbuf = ''
30
+ end
31
+
32
+ def make_char_parser
33
+ # care stringscanner's bug, see issues
34
+ (/./.r | /\n/).fail('char'){|c| @charsbuf << c}
35
+ end
36
+
37
+ # make a single-line markdown parser
38
+ def make_markdown_line_translator
39
+ line_text = lazy{line}.map{|tokens|
40
+ tokens.empty? ? Rsec::INVALID : tokens.join # filter out empty
41
+ }
42
+
43
+ title = /"[^"]*"|'[^']*'/.r._?{|(s)|
44
+ s ? "title=#{s}" : ''
45
+ }
46
+ img = seq('!['.r >> /[^\]]+/ << '](', /[^\)"']+/, title, ')'){|(txt, path, title)|
47
+ "<img src='#{path}' #{title}>#{txt}</img>"
48
+ }
49
+ link = seq(('['.r >> /[^\]]+/ << ']('), /[^\)"']+/, title, ')'){|(txt, path, title)|
50
+ "<a href='#{path}' #{title}>#{txt}</a>"
51
+ }
52
+ # NOTE strong should be left of em
53
+ strong = ('**'.r >> line_text << '**').map{|s|
54
+ "<strong>#{s}</strong>"
55
+ }
56
+ em = ('*'.r >> line_text << '*').map{|s|
57
+ "<em>#{s}</em>"
58
+ }
59
+ code = ('`'.r >> /[^`]+/ << '`').map{|s|
60
+ "<code>#{s}</code>"
61
+ }
62
+ escape = '<'.r{'&lt;'} | '&'.r{'&amp;'} | /\\[\!\`\*\[\]]/.r{|s|s[1]}
63
+ text = /[^\!\`\*\[\]]+/
64
+ id = seq_(('['.r >> /[^\]]+/ << ']:'), text){|(id, text)|
65
+ "<span id='#{id}'>#{text}</span>"
66
+ }
67
+ line = (img | link | strong | em | code | escape | id | text).star
68
+ line.eof.map &:join
69
+ end
70
+
71
+ # pseudo xml tag parser, except <br> and <hr> and <script>
72
+ def make_xml_tag_parser
73
+ name = /[\w-]+/ # greedy, no need to worry space between first attr
74
+ value = /"[^"]*"|'[^']*'/
75
+ attr = seq_(name, seq_('=', value)._?)
76
+ attrs = /\s*/.r.join(attr)
77
+
78
+ # use a stack to ensure tag matching
79
+ tag_start = seq('<', name, attrs){|res|
80
+ @stack.push res[1].downcase
81
+ res
82
+ }
83
+ tag_empty_end = '/>'.r{|res|
84
+ @stack.pop
85
+ res
86
+ }
87
+ tag_non_empty_end = seq('>', lazy{content}, '</', name, /\s*\>/){|res|
88
+ if @stack.pop == res[3].downcase
89
+ res
90
+ else
91
+ Rsec::INVALID
92
+ end
93
+ }
94
+ special_tag = /\<[bh]r\s*\>/i.r | seq_('<script', attrs, /\>.*?\<\/script\>/)
95
+ tag = special_tag | seq(tag_start, (tag_empty_end | tag_non_empty_end))
96
+
97
+ # xml content
98
+ comment = /<!--([^-]|-[^-])*-->/
99
+ cdata = /<!\[CDATA\[.*?\]\]>/x
100
+ entity = /&(nbsp|lt|gt|amp|cent|pound|yen|euro|sect|copy|reg|trade|#[a-f0-9]{2,4});/i
101
+ text = /[^<&]+/
102
+ content = (cdata.r | comment | entity | tag | text).star
103
+ tag.fail('tag'){|res|
104
+ if @charsbuf.end_with? "\n"
105
+ flush_chars
106
+ @out << res.join
107
+ else
108
+ @charsbuf << res.join # inline tags
109
+ end
110
+ }
111
+ end
112
+
113
+ # translate markdown
114
+ def translate_markdown str
115
+ lines = str.split("\n").chunk{|line|
116
+ line[/^(\ {4}|\#{1,6}\ |[\+\-\>]\ |)/]
117
+ }.map{|(leading, lines)|
118
+ case leading
119
+ when ' ' # code
120
+ "<pre><code>#{lines.join "\n"}</code></pre>"
121
+ when /\#{1,6}/ # headings
122
+ hn = "h#{leading.strip.size}"
123
+ lines.map! do |line|
124
+ line = line.sub(/\#{1,6}/, '')
125
+ "<#{hn}>#{@markdown_line_translator.parse! line}</#{hn}>"
126
+ end
127
+ lines.join
128
+ when '> ' # block quote
129
+ # TODO nested
130
+ lines.map! do |line|
131
+ @markdown_line_translator.parse! line[2..-1]
132
+ end
133
+ "<blockquote>#{lines.join '<br/>'}</blockquote>"
134
+ when '+ ' # numbered list
135
+ # TODO nested
136
+ lines.map! do |line|
137
+ "<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
138
+ end
139
+ "<ol>#{lines.join}</ol>"
140
+ when '- ' # unordered list
141
+ # TODO nested
142
+ lines.map! do |line|
143
+ "<li>#{@markdown_line_translator.parse! line[2..-1]}</li>"
144
+ end
145
+ "<ul>#{lines.join}</ul>"
146
+ else
147
+ lines.map! do |line|
148
+ @markdown_line_translator.parse! line
149
+ end
150
+ lines.join "<br/>"
151
+ end
152
+ }
153
+ # add trailing '\n' s
154
+ lines.join('<br/>') << ('<br/>' * str[/\n*\Z/].size)
155
+ end
156
+
157
+ end
158
+
159
+ if __FILE__ == $PROGRAM_NAME
160
+ lm = LittleMarkdown.new
161
+ puts lm.translate <<-MD
162
+ ## *a *
163
+ <pre a="3">123afd</pre>
164
+ ** b **
165
+ MD
166
+ end
167
+
@@ -0,0 +1,136 @@
1
+ # Parse NASM manual [nasm.txt] and generate a list of opcodes.
2
+ # Results are saved in [nasm_codes.txt], undocumented codes are printed.
3
+ # Further: extend the parser to generate an X86 assembler.
4
+ require "rsec"
5
+
6
+ module NASMManualParser
7
+ include Rsec::Helper
8
+ extend self
9
+
10
+ Instructions = {}
11
+
12
+ class UnSupportedError < RuntimeError
13
+ end
14
+
15
+ class Instruction < Struct.new(:nemonic, :operands, :code, :archs)
16
+ end
17
+
18
+ def debug parser, *strs
19
+ return parser unless $debug
20
+ strs.each do |str|
21
+ parser.eof.parse! str
22
+ end
23
+ parser
24
+ end
25
+
26
+ def reg_parser
27
+ gp_reg = /E?[ABCD]X|E?(SP|BP|SI|DI)/
28
+ gp_reg8 = /[ABCD][HL]/
29
+ seg_reg = /ES|CS|SS|DS|FS|GS/
30
+ fpu_reg = /ST[0-7]/
31
+ mmx_reg = /MM[0-7]/
32
+ xr_reg = /CR[0234]|DR[012367]|TR[34567]/
33
+ reg = gp_reg.r | gp_reg8 | seg_reg | fpu_reg | mmx_reg | xr_reg
34
+ debug reg, 'AX'
35
+ end
36
+
37
+ def operands_parser
38
+ imm_class = /imm:imm(32|16)|imm(32|16|8)?/
39
+ mem_class = /mem(80|64|32|16|8)?/ # be ware of the order
40
+ reg_class = /reg(32|16|8)|(fpu|mmx|seg)reg/
41
+ memoffs_class = /memoffs(32|16|8)/
42
+ tr_class = 'TR3/4/5/6/7'
43
+ classes = (imm_class.r | memoffs_class | mem_class | reg_class | tr_class).fail 'operand class'
44
+ reg = reg_parser.fail 'register'
45
+ num = /\d/.r(&:to_i).fail 'num'
46
+ # memoffs should be left of mem
47
+ operand = classes | reg | num
48
+ operands = operand.join('/').even.join(',').even
49
+ debug operands, 'reg32', 'AX,memoffs16'
50
+ end
51
+
52
+ def code_parser
53
+ plus_cc = /[0-9A-F][0-9A-F]\+cc/
54
+ plus_r = /[0-9A-F][0-9A-F]\+r/
55
+ hex = /[0-9A-F][0-9A-F]/.r {|s| s.to_i 16}
56
+ slash = /\/[\dr]/
57
+ imm_code = /i[bwd]/
58
+ reg_code = /rw\/rd|r[bwd]/
59
+ ref_code = /ow\/od|o[wd]/
60
+ prefix_code = /[oa](32|16)/
61
+ code =\
62
+ (plus_cc.r | plus_r | hex | slash |
63
+ imm_code | reg_code | ref_code | prefix_code).join(/\s+/).even
64
+ debug code, 'o32 0F C8+r', 'o32 6B /r ib', 'o16 A1 ow/od'
65
+ end
66
+
67
+ def archs_parser
68
+ arch = symbol(/8086|186|286|386|486|PENT|P6|CYRIX|FPU|MMX|PRIV|UNDOC/)
69
+ archs = ('['.r >> arch.join(',').even << ']').map do |archs|
70
+ # map to set
71
+ archs.inject({}){|h, arch|
72
+ raise UnSupportedError, 'not implemented' if arch == 'UNDOC'
73
+ h[arch] = true
74
+ h
75
+ }
76
+ end
77
+ debug archs, '[386,FPU]'
78
+ end
79
+
80
+ def instruction_parser
81
+ nemonic = /[A-Z]\w+|xxSAR/
82
+ operands = operands_parser._?
83
+ code = ';'.r >> code_parser
84
+ archs = archs_parser
85
+ instruction = seq_ nemonic, operands, code, archs do |nemonic, (operands), code, archs|
86
+ Instruction.new nemonic, operands, code, archs
87
+ end
88
+ debug instruction, 'FISUBR mem32 ; DA /5 [8086,FPU]', 'BSWAP reg32 ; o32 0F C8+r [486]'
89
+ end
90
+
91
+ def desugar line
92
+ # r/m short hands
93
+ line = line.gsub /r\/m(32|16|8)/, 'reg\1/mem\1'
94
+ line.gsub! 'r/m64', 'mmxreg/mem64'
95
+ # compress space
96
+ line.sub! /\s(TO|NEAR|FAR|SHORT)/, '_\1'
97
+ line
98
+ end
99
+
100
+ def parse_line parser, line
101
+ parser.parse! desugar line
102
+ rescue Rsec::SyntaxError
103
+ rescue UnSupportedError
104
+ end
105
+
106
+ def parse filename
107
+ parsed = ''
108
+ parser = instruction_parser.eof
109
+ src = File.read filename
110
+ src.lines.with_index do |raw_line, idx|
111
+ line = raw_line.strip
112
+ # this shapy shows the line is something defining an nemonic
113
+ if line =~ /^\w+\s+[^;\[]+;\ [^;\[]+\[.+\]$/
114
+ if (parse_line parser, line)
115
+ parsed << raw_line
116
+ else
117
+ puts "unparsed:#{idx}\t#{line}"
118
+ end
119
+ end
120
+ end
121
+ parsed
122
+ end
123
+
124
+ end
125
+
126
+ if __FILE__ == $PROGRAM_NAME
127
+ $debug = true
128
+ manual = "#{File.dirname __FILE__}/nasm_manual.txt"
129
+ codes = "#{File.dirname __FILE__}/nasm_codes.txt"
130
+ File.open codes, 'w' do |file|
131
+ file.<< NASMManualParser.parse manual
132
+ end
133
+ puts '-' * 80
134
+ puts "X86 asm codes are saved to #{codes}"
135
+ end
136
+
data/lib/rsec/helpers.rb CHANGED
@@ -212,7 +212,7 @@ module Rsec #:nodoc:
212
212
 
213
213
  # @ desc
214
214
  # Repeat n or in a range.
215
- # If range.end < 0, repeat at least range.begin
215
+ # If range.end &lt; 0, repeat at least range.begin
216
216
  # (Infinity and -Infinity are considered)
217
217
  def * n, &p
218
218
  # FIXME if self is an epsilon parser, will cause infinite loop
@@ -329,8 +329,8 @@ module Rsec #:nodoc:
329
329
  # @ desc
330
330
  # Think about "innerHTML"!
331
331
  # @ example
332
- # parser = seq('<b>', /[\w\s]+/, '</b>').inner
333
- # parser.parse('<b>the inside</b>')
332
+ # parser = seq('&lt;b&gt;', /[\w\s]+/, '&lt;/b&gt;').inner
333
+ # parser.parse('&lt;b&gt;the inside&lt;/b&gt;')
334
334
  def inner &p
335
335
  Inner[self].map p
336
336
  end
@@ -14,8 +14,6 @@ module Rsec #:nodoc
14
14
  class Fail < Binary
15
15
  def Fail.[] left, tokens
16
16
  # TODO mutex
17
- @mask_bit ||= 0
18
- @token_table ||= []
19
17
  if @mask_bit > 1000
20
18
  raise "You've created too many fail parsers, If it is your intention, call Rsec::Fail.reset when previous expect settings can be thrown away."
21
19
  end
@@ -29,6 +27,7 @@ module Rsec #:nodoc
29
27
  @mask_bit = 0
30
28
  @token_table = []
31
29
  end
30
+ Fail.reset
32
31
 
33
32
  def Fail.get_tokens mask
34
33
  res = []
@@ -116,6 +115,7 @@ module Rsec #:nodoc
116
115
  end
117
116
 
118
117
  # should be end-of-file after parsing
118
+ # FIXME seems parser keeps a state when using parse!, see nasm manual parse
119
119
  class Eof < Unary
120
120
  def _parse ctx
121
121
  ret = some()._parse ctx
data/lib/rsec/utils.rb CHANGED
@@ -92,6 +92,7 @@ module Rsec #:nodoc:
92
92
  # the text is 80 at most
93
93
  def line_text pos
94
94
  from = string.rindex "\n", pos
95
+ (from = string.rindex "\n", pos - 1) if from == pos
95
96
  from = from ? from + 1 : 0
96
97
  from = pos - 40 if (from < pos - 40)
97
98
 
data/readme.rdoc CHANGED
@@ -16,7 +16,7 @@ The pure Ruby gem is fast enough (about 10+x faster than treetop generated code)
16
16
 
17
17
  For extreme performance under C Ruby:
18
18
 
19
- gem in rsec-ext
19
+ gem in rsec-ext
20
20
 
21
21
  It is about 30% faster than Haskell Parsec in the benchmark.
22
22
 
data/test/test_misc.rb CHANGED
@@ -20,6 +20,10 @@ class TestMisc < TC
20
20
  p = ''.r.eof
21
21
  asp '', p
22
22
  ase INVALID, p.parse('a')
23
+
24
+ p = seq('a', 'b').eof
25
+ ase INVALID, p.parse('abc')
26
+ ase ['a', 'b'], p.parse('ab')
23
27
  end
24
28
 
25
29
  def test_cache
metadata CHANGED
@@ -1,24 +1,28 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rsec
3
- version: !ruby/object:Gem::Version
4
- version: 0.3.2
3
+ version: !ruby/object:Gem::Version
5
4
  prerelease:
5
+ version: 0.3.6
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - NS
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-02-24 00:00:00.000000000 +08:00
12
+
13
+ date: 2011-03-08 00:00:00 +08:00
13
14
  default_executable:
14
15
  dependencies: []
16
+
15
17
  description: Easy and extreme fast dynamic PEG parser combinator.
16
18
  email:
17
19
  executables: []
20
+
18
21
  extensions: []
19
- extra_rdoc_files:
22
+
23
+ extra_rdoc_files:
20
24
  - readme.rdoc
21
- files:
25
+ files:
22
26
  - license.txt
23
27
  - readme.rdoc
24
28
  - lib/rsec/helpers.rb
@@ -33,6 +37,8 @@ files:
33
37
  - examples/arithmetic.rb
34
38
  - examples/bnf.rb
35
39
  - examples/c_minus.rb
40
+ - examples/little_markdown.rb
41
+ - examples/nasm_manual.rb
36
42
  - examples/scheme.rb
37
43
  - examples/slow_json.rb
38
44
  - examples/s_exp.rb
@@ -55,26 +61,30 @@ files:
55
61
  has_rdoc: true
56
62
  homepage: http://rsec.heroku.com
57
63
  licenses: []
64
+
58
65
  post_install_message:
59
66
  rdoc_options: []
60
- require_paths:
67
+
68
+ require_paths:
61
69
  - lib
62
- required_ruby_version: !ruby/object:Gem::Requirement
70
+ required_ruby_version: !ruby/object:Gem::Requirement
63
71
  none: false
64
- requirements:
65
- - - ! '>='
66
- - !ruby/object:Gem::Version
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
67
75
  version: 1.9.1
68
- required_rubygems_version: !ruby/object:Gem::Requirement
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
77
  none: false
70
- requirements:
71
- - - ! '>='
72
- - !ruby/object:Gem::Version
73
- version: '0'
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: "0"
74
82
  requirements: []
83
+
75
84
  rubyforge_project:
76
- rubygems_version: 1.5.2
85
+ rubygems_version: 1.6.1
77
86
  signing_key:
78
87
  specification_version: 3
79
88
  summary: Extreme Fast Parser Combinator for Ruby
80
89
  test_files: []
90
+