textpow 0.10.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (193) hide show
  1. data/.travis.yml +7 -0
  2. data/Gemfile +10 -0
  3. data/Gemfile.lock +30 -0
  4. data/{History.txt → History.rdoc} +10 -0
  5. data/{README.txt → MIT-LICENSE.txt} +0 -30
  6. data/README.rdoc +82 -0
  7. data/Rakefile +42 -49
  8. data/bin/plist2syntax +0 -0
  9. data/bin/plist2yaml +0 -0
  10. data/examples/benchmark_js.rb +13 -0
  11. data/examples/jquery.js +9046 -0
  12. data/lib/textpow.rb +43 -3
  13. data/lib/textpow/debug_processor.rb +35 -35
  14. data/lib/textpow/recording_processor.rb +13 -0
  15. data/lib/textpow/score_manager.rb +60 -60
  16. data/lib/textpow/syntax.rb +303 -262
  17. data/lib/textpow/syntax/broken/markdown.syntax +519 -0
  18. data/lib/textpow/syntax/broken/php.syntax +1253 -0
  19. data/lib/textpow/syntax/buggy/nemerle.syntax +74 -0
  20. data/lib/textpow/syntax/old/YAML.yaml +160 -0
  21. data/lib/textpow/syntax/old/txt2tags.yaml +139 -0
  22. data/lib/textpow/syntax/source.actionscript.syntax +97 -0
  23. data/lib/textpow/syntax/source.active4d.library.syntax +21 -0
  24. data/lib/textpow/syntax/source.active4d.syntax +276 -0
  25. data/lib/textpow/syntax/source.ada.syntax +33 -0
  26. data/lib/textpow/syntax/source.antlr.syntax +151 -0
  27. data/lib/textpow/syntax/source.apache-config.mod_perl.syntax +50 -0
  28. data/lib/textpow/syntax/source.apache-config.syntax +191 -0
  29. data/lib/textpow/syntax/source.applescript.syntax +384 -0
  30. data/lib/textpow/syntax/source.asp.syntax +70 -0
  31. data/lib/textpow/syntax/source.asp.vb.net.syntax +129 -0
  32. data/lib/textpow/syntax/source.c++.qt.syntax +26 -0
  33. data/lib/textpow/syntax/source.c++.syntax +186 -0
  34. data/lib/textpow/syntax/source.c-sharp.syntax +59 -0
  35. data/lib/textpow/syntax/source.c.ragel.syntax +201 -0
  36. data/lib/textpow/syntax/source.c.syntax +414 -0
  37. data/lib/textpow/syntax/source.camlp4.ocaml.syntax +36 -0
  38. data/lib/textpow/syntax/source.cm.syntax +32 -0
  39. data/lib/textpow/syntax/source.coffee.syntax +216 -0
  40. data/lib/textpow/syntax/source.context-free.syntax +176 -0
  41. data/lib/textpow/syntax/source.css.beta.syntax +1925 -0
  42. data/lib/textpow/syntax/source.css.syntax +195 -0
  43. data/lib/textpow/syntax/source.d.syntax +142 -0
  44. data/lib/textpow/syntax/source.diff.syntax +81 -0
  45. data/lib/textpow/syntax/source.dot.syntax +47 -0
  46. data/lib/textpow/syntax/source.dylan.syntax +62 -0
  47. data/lib/textpow/syntax/source.eiffel.syntax +78 -0
  48. data/lib/textpow/syntax/source.erlang.syntax +922 -0
  49. data/lib/textpow/syntax/source.fortran.syntax +141 -0
  50. data/lib/textpow/syntax/source.fscript.syntax +80 -0
  51. data/lib/textpow/syntax/source.fxscript.syntax +142 -0
  52. data/lib/textpow/syntax/source.gri.syntax +83 -0
  53. data/lib/textpow/syntax/source.groovy.groovy.syntax +191 -0
  54. data/lib/textpow/syntax/source.haskell.syntax +88 -0
  55. data/lib/textpow/syntax/source.icalendar.syntax +32 -0
  56. data/lib/textpow/syntax/source.inform.syntax +48 -0
  57. data/lib/textpow/syntax/source.ini.syntax +55 -0
  58. data/lib/textpow/syntax/source.io.syntax +81 -0
  59. data/lib/textpow/syntax/source.java-props.syntax +20 -0
  60. data/lib/textpow/syntax/source.java.syntax +211 -0
  61. data/lib/textpow/syntax/source.js.greasemonkey.syntax +34 -0
  62. data/lib/textpow/syntax/source.js.jquery.syntax +114 -0
  63. data/lib/textpow/syntax/source.js.mootools.syntax +572 -0
  64. data/lib/textpow/syntax/source.js.prototype.bracketed.syntax +140 -0
  65. data/lib/textpow/syntax/source.js.prototype.syntax +72 -0
  66. data/lib/textpow/syntax/source.js.syntax +256 -0
  67. data/lib/textpow/syntax/source.js.yui.syntax +176 -0
  68. data/lib/textpow/syntax/source.json.syntax +136 -0
  69. data/lib/textpow/syntax/source.lex.syntax +219 -0
  70. data/lib/textpow/syntax/source.lighttpd-config.syntax +54 -0
  71. data/lib/textpow/syntax/source.lilypond.syntax +492 -0
  72. data/lib/textpow/syntax/source.lisp.syntax +61 -0
  73. data/lib/textpow/syntax/source.logo.syntax +29 -0
  74. data/lib/textpow/syntax/source.logtalk.syntax +152 -0
  75. data/lib/textpow/syntax/source.lua.syntax +86 -0
  76. data/lib/textpow/syntax/source.makefile.syntax +36 -0
  77. data/lib/textpow/syntax/source.matlab.syntax +142 -0
  78. data/lib/textpow/syntax/source.mel.syntax +92 -0
  79. data/lib/textpow/syntax/source.mips.syntax +66 -0
  80. data/lib/textpow/syntax/source.ml.syntax +121 -0
  81. data/lib/textpow/syntax/source.modula-3.syntax +47 -0
  82. data/lib/textpow/syntax/source.nant-build.syntax +53 -0
  83. data/lib/textpow/syntax/source.objc++.syntax +18 -0
  84. data/lib/textpow/syntax/source.objc.syntax +233 -0
  85. data/lib/textpow/syntax/source.ocaml.syntax +764 -0
  86. data/lib/textpow/syntax/source.ocamllex.syntax +167 -0
  87. data/lib/textpow/syntax/source.ocamlyacc.syntax +184 -0
  88. data/lib/textpow/syntax/source.open-gl.syntax +14 -0
  89. data/lib/textpow/syntax/source.pascal.syntax +77 -0
  90. data/lib/textpow/syntax/source.pascal.vectorscript.syntax +57 -0
  91. data/lib/textpow/syntax/source.perl.syntax +1113 -0
  92. data/lib/textpow/syntax/source.php.cake.syntax +55 -0
  93. data/lib/textpow/syntax/source.plist.tm-grammar.syntax +708 -0
  94. data/lib/textpow/syntax/source.postscript.syntax +114 -0
  95. data/lib/textpow/syntax/source.processing.syntax +106 -0
  96. data/lib/textpow/syntax/source.prolog.syntax +40 -0
  97. data/lib/textpow/syntax/source.python.django.syntax +21 -0
  98. data/lib/textpow/syntax/source.python.syntax +868 -0
  99. data/lib/textpow/syntax/source.qmake.syntax +114 -0
  100. data/lib/textpow/syntax/source.quake-config.syntax +32 -0
  101. data/lib/textpow/syntax/source.r-console.syntax +16 -0
  102. data/lib/textpow/syntax/source.r.syntax +81 -0
  103. data/lib/textpow/syntax/source.regexp.oniguruma.syntax +107 -0
  104. data/lib/textpow/syntax/source.regexp.python.syntax +109 -0
  105. data/lib/textpow/syntax/source.regexp.syntax +50 -0
  106. data/lib/textpow/syntax/source.remind.syntax +253 -0
  107. data/lib/textpow/syntax/source.rez.syntax +80 -0
  108. data/lib/textpow/syntax/source.ruby.experimental.syntax +145 -0
  109. data/lib/textpow/syntax/source.ruby.rails.syntax +88 -0
  110. data/lib/textpow/syntax/source.ruby.syntax +1035 -0
  111. data/lib/textpow/syntax/source.s5.syntax +69 -0
  112. data/lib/textpow/syntax/source.sass.syntax +45 -0
  113. data/lib/textpow/syntax/source.scheme.syntax +347 -0
  114. data/lib/textpow/syntax/source.scilab.syntax +41 -0
  115. data/lib/textpow/syntax/source.scss.syntax +527 -0
  116. data/lib/textpow/syntax/source.shell.syntax +384 -0
  117. data/lib/textpow/syntax/source.slate.syntax +149 -0
  118. data/lib/textpow/syntax/source.smarty.syntax +63 -0
  119. data/lib/textpow/syntax/source.sql.ruby.syntax +18 -0
  120. data/lib/textpow/syntax/source.sql.syntax +237 -0
  121. data/lib/textpow/syntax/source.ssh-config.syntax +33 -0
  122. data/lib/textpow/syntax/source.strings.syntax +39 -0
  123. data/lib/textpow/syntax/source.swig.syntax +57 -0
  124. data/lib/textpow/syntax/source.tcl.macports.syntax +163 -0
  125. data/lib/textpow/syntax/source.tcl.syntax +152 -0
  126. data/lib/textpow/syntax/source.yaml.syntax +160 -0
  127. data/lib/textpow/syntax/text.active4d-ini.syntax +50 -0
  128. data/lib/textpow/syntax/text.bbcode.syntax +287 -0
  129. data/lib/textpow/syntax/text.bibtex.syntax +151 -0
  130. data/lib/textpow/syntax/text.blog.html.syntax +41 -0
  131. data/lib/textpow/syntax/text.blog.markdown.syntax +42 -0
  132. data/lib/textpow/syntax/text.blog.syntax +27 -0
  133. data/lib/textpow/syntax/text.blog.textile.syntax +27 -0
  134. data/lib/textpow/syntax/text.gtdalt.syntax +143 -0
  135. data/lib/textpow/syntax/text.haml.syntax +88 -0
  136. data/lib/textpow/syntax/text.html.asp.net.syntax +424 -0
  137. data/lib/textpow/syntax/text.html.asp.syntax +27 -0
  138. data/lib/textpow/syntax/text.html.basic.syntax +362 -0
  139. data/lib/textpow/syntax/text.html.cfm.syntax +119 -0
  140. data/lib/textpow/syntax/text.html.django.syntax +36 -0
  141. data/lib/textpow/syntax/text.html.dokuwiki.syntax +204 -0
  142. data/lib/textpow/syntax/text.html.doxygen.syntax +43 -0
  143. data/lib/textpow/syntax/text.html.markdown.multimarkdown.syntax +39 -0
  144. data/lib/textpow/syntax/text.html.mason.syntax +119 -0
  145. data/lib/textpow/syntax/text.html.mediawiki.syntax +567 -0
  146. data/lib/textpow/syntax/text.html.mt.syntax +162 -0
  147. data/lib/textpow/syntax/text.html.ruby.syntax +40 -0
  148. data/lib/textpow/syntax/text.html.strict.active4d.syntax +311 -0
  149. data/lib/textpow/syntax/text.html.tcl.syntax +26 -0
  150. data/lib/textpow/syntax/text.html.textile.syntax +215 -0
  151. data/lib/textpow/syntax/text.html.tt.syntax +121 -0
  152. data/lib/textpow/syntax/text.html.twiki.syntax +241 -0
  153. data/lib/textpow/syntax/text.html.xhtml.1-strict.syntax +4027 -0
  154. data/lib/textpow/syntax/text.log.latex.syntax +50 -0
  155. data/lib/textpow/syntax/text.mail.markdown.syntax +118 -0
  156. data/lib/textpow/syntax/text.man.syntax +17 -0
  157. data/lib/textpow/syntax/text.moinmoin.syntax +189 -0
  158. data/lib/textpow/syntax/text.plain.gtd.syntax +22 -0
  159. data/lib/textpow/syntax/text.plain.release-notes.syntax +46 -0
  160. data/lib/textpow/syntax/text.plain.syntax +32 -0
  161. data/lib/textpow/syntax/text.plist.syntax +635 -0
  162. data/lib/textpow/syntax/text.pmwiki.syntax +113 -0
  163. data/lib/textpow/syntax/text.restructuredtext.syntax +250 -0
  164. data/lib/textpow/syntax/text.setext.syntax +147 -0
  165. data/lib/textpow/syntax/text.subversion-commit.syntax +36 -0
  166. data/lib/textpow/syntax/text.tabular.csv.syntax +68 -0
  167. data/lib/textpow/syntax/text.tabular.tsv.syntax +50 -0
  168. data/lib/textpow/syntax/text.tex.latex.beamer.syntax +41 -0
  169. data/lib/textpow/syntax/text.tex.latex.haskell.syntax +24 -0
  170. data/lib/textpow/syntax/text.tex.latex.memoir.syntax +64 -0
  171. data/lib/textpow/syntax/text.tex.latex.rd.syntax +91 -0
  172. data/lib/textpow/syntax/text.tex.latex.sweave.syntax +84 -0
  173. data/lib/textpow/syntax/text.tex.latex.syntax +566 -0
  174. data/lib/textpow/syntax/text.tex.math.syntax +49 -0
  175. data/lib/textpow/syntax/text.tex.syntax +86 -0
  176. data/lib/textpow/syntax/text.txt2tags.syntax +79 -0
  177. data/lib/textpow/syntax/text.xml.apple-dist.syntax +77 -0
  178. data/lib/textpow/syntax/text.xml.strict.syntax +92 -0
  179. data/lib/textpow/syntax/text.xml.syntax +180 -0
  180. data/lib/textpow/syntax/text.xml.xsl.syntax +60 -0
  181. data/lib/textpow/version.rb +3 -0
  182. data/spec/fixtures/objeck.plist +107 -0
  183. data/spec/fixtures/utf8.txt +1 -0
  184. data/spec/spec_helper.rb +2 -0
  185. data/spec/textpow/score_manager_spec.rb +20 -0
  186. data/spec/textpow/syntax_files_spec.rb +26 -0
  187. data/spec/textpow/syntax_spec.rb +225 -0
  188. data/spec/textpow_spec.rb +57 -0
  189. data/textpow.gemspec +19 -0
  190. metadata +246 -68
  191. data/Manifest.txt +0 -13
  192. data/mm/manual.mm +0 -266
  193. data/test/test_textpow.rb +0 -25
@@ -1,10 +1,50 @@
1
1
  require 'yaml'
2
- require 'oniguruma'
3
2
  require 'textpow/syntax'
4
3
  require 'textpow/debug_processor'
4
+ require 'textpow/recording_processor'
5
5
  require 'textpow/score_manager'
6
-
6
+ require 'textpow/version'
7
7
 
8
8
  module Textpow
9
- class ParsingError < Exception; end
9
+ class ParsingError < Exception; end
10
+
11
+ def self.syntax_path
12
+ File.join(File.dirname(__FILE__), 'textpow', 'syntax')
13
+ end
14
+
15
+ @@syntax = {}
16
+ def self.syntax(syntax_name)
17
+ syntax_name = syntax_name.downcase
18
+ if @@syntax.has_key?(syntax_name)
19
+ @@syntax[syntax_name]
20
+ else
21
+ @@syntax[syntax_name] = uncached_syntax(syntax_name)
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def self.uncached_syntax(name)
28
+ path = (
29
+ find_syntax_by_path(name) ||
30
+ find_syntax_by_scope_name(name) ||
31
+ find_syntax_by_fuzzy_name(name)
32
+ )
33
+ SyntaxNode.load(path) if path
34
+ end
35
+
36
+ def self.find_syntax_by_scope_name(name)
37
+ path = File.join(syntax_path, "#{name}.syntax")
38
+ path if File.exist?(path)
39
+ end
40
+
41
+ def self.find_syntax_by_fuzzy_name(name)
42
+ path = Dir.glob(File.join(syntax_path, "*.#{name}.*")).sort_by(&:size).first
43
+ path if path and File.exist?(path)
44
+ end
45
+
46
+ def self.find_syntax_by_path(path)
47
+ path if File.file?(path)
48
+ end
10
49
  end
50
+
@@ -1,36 +1,36 @@
1
- module Textpow
2
- class DebugProcessor
3
- def initialize
4
- @line_number = 0
5
- @printable_line = ""
6
- end
7
-
8
- def pprint line, string, position = 0
9
- line.replace line.ljust( position + string.size, " ")
10
- line[position,string.size] = string
11
- line
12
- end
13
-
14
- def open_tag name, position
15
- STDERR.puts pprint( "", "{#{name}", position + @line_marks.size)
16
- end
17
-
18
- def close_tag name, position
19
- STDERR.puts pprint( "", "}#{name}", position + @line_marks.size)
20
- end
21
-
22
- def new_line line
23
- @line_number += 1
24
- @line_marks = "[#{@line_number.to_s.rjust( 4, '0' )}] "
25
- STDERR.puts "#{@line_marks}#{line}"
26
- end
27
-
28
- def start_parsing name
29
- STDERR.puts "{#{name}"
30
- end
31
-
32
- def end_parsing name
33
- STDERR.puts "}#{name}"
34
- end
35
- end
1
+ module Textpow
2
+ class DebugProcessor
3
+ def initialize
4
+ @line_number = 0
5
+ @printable_line = ""
6
+ end
7
+
8
+ def pprint line, string, position = 0
9
+ line.replace line.ljust( position + string.size, " ")
10
+ line[position,string.size] = string
11
+ line
12
+ end
13
+
14
+ def open_tag name, position
15
+ STDERR.puts pprint( "", "{#{name}", position + @line_marks.size)
16
+ end
17
+
18
+ def close_tag name, position
19
+ STDERR.puts pprint( "", "}#{name}", position + @line_marks.size)
20
+ end
21
+
22
+ def new_line line
23
+ @line_number += 1
24
+ @line_marks = "[#{@line_number.to_s.rjust( 4, '0' )}] "
25
+ STDERR.puts "#{@line_marks}#{line}"
26
+ end
27
+
28
+ def start_parsing name
29
+ STDERR.puts "{#{name}"
30
+ end
31
+
32
+ def end_parsing name
33
+ STDERR.puts "}#{name}"
34
+ end
35
+ end
36
36
  end
@@ -0,0 +1,13 @@
1
+ module Textpow
2
+ class RecordingProcessor
3
+ attr_accessor :stack
4
+
5
+ def initialize
6
+ @stack = []
7
+ end
8
+
9
+ def method_missing(name, *args)
10
+ @stack << [name, *args]
11
+ end
12
+ end
13
+ end
@@ -1,65 +1,65 @@
1
1
  module Textpow
2
- class ScoreManager
3
- POINT_DEPTH = 4
4
- NESTING_DEPTH = 40
5
- START_VALUE = 2 ** ( POINT_DEPTH * NESTING_DEPTH )
6
- BASE = 2 ** POINT_DEPTH
7
-
8
- def initialize
9
- @scores = {}
10
- end
11
-
12
- def score search_scope, reference_scope
13
- max = 0
14
- search_scope.split( ',' ).each do |scope|
15
- arrays = scope.split(/\B-/)
16
- if arrays.size == 1
17
- max = [max, score_term( arrays[0], reference_scope )].max
18
- elsif arrays.size > 1
19
- excluded = false
20
- arrays[1..-1].each do |a|
21
- if score_term( arrays[1], reference_scope ) > 0
22
- excluded = true
23
- break
24
- end
25
- end
26
- max = [max, score_term( arrays[0], reference_scope )].max unless excluded
27
- else
28
- raise ParsingError, "Error in scope string: '#{search_scope}' #{arrays.size} is not a valid number of operands" if arrays.size < 1
2
+ class ScoreManager
3
+ POINT_DEPTH = 4
4
+ NESTING_DEPTH = 40
5
+ START_VALUE = 2 ** ( POINT_DEPTH * NESTING_DEPTH )
6
+ BASE = 2 ** POINT_DEPTH
7
+
8
+ def initialize
9
+ @scores = {}
10
+ end
11
+
12
+ def score search_scope, reference_scope
13
+ max = 0
14
+ search_scope.split( ',' ).each do |scope|
15
+ arrays = scope.split(/\B-/)
16
+ if arrays.size == 1
17
+ max = [max, score_term( arrays[0], reference_scope )].max
18
+ elsif arrays.size > 1
19
+ excluded = false
20
+ arrays[1..-1].each do |a|
21
+ if score_term( arrays[1], reference_scope ) > 0
22
+ excluded = true
23
+ break
29
24
  end
30
- end
31
- max
32
- end
33
-
34
- private
35
-
36
- def score_term search_scope, reference_scope
37
- unless @scores[reference_scope] && @scores[reference_scope][search_scope]
38
- @scores[reference_scope] ||= {}
39
- @scores[reference_scope][search_scope] = score_array( search_scope.split(' '), reference_scope.split( ' ' ) )
40
- end
41
- @scores[reference_scope][search_scope]
25
+ end
26
+ max = [max, score_term( arrays[0], reference_scope )].max unless excluded
27
+ else
28
+ raise ParsingError, "Error in scope string: '#{search_scope}' #{arrays.size} is not a valid number of operands" if arrays.size < 1
29
+ end
42
30
  end
43
-
44
- def score_array search_array, reference_array
45
- pending = search_array
46
- current = reference_array.last
47
- reg = Regexp.new( "^#{Regexp.escape( pending.last )}" )
48
- multiplier = START_VALUE
49
- result = 0
50
- while pending.size > 0 && current
51
- if reg =~ current
52
- point_score = (2**POINT_DEPTH) - current.count( '.' ) + Regexp.last_match[0].count( '.' )
53
- result += point_score * multiplier
54
- pending.pop
55
- reg = Regexp.new( "^#{Regexp.escape( pending.last )}" ) if pending.size > 0
56
- end
57
- multiplier = multiplier / BASE
58
- reference_array.pop
59
- current = reference_array.last
60
- end
61
- result = 0 if pending.size > 0
62
- result
31
+ max
32
+ end
33
+
34
+ private
35
+
36
+ def score_term search_scope, reference_scope
37
+ unless @scores[reference_scope] && @scores[reference_scope][search_scope]
38
+ @scores[reference_scope] ||= {}
39
+ @scores[reference_scope][search_scope] = score_array( search_scope.split(' '), reference_scope.split( ' ' ) )
40
+ end
41
+ @scores[reference_scope][search_scope]
42
+ end
43
+
44
+ def score_array search_array, reference_array
45
+ pending = search_array
46
+ current = reference_array.last
47
+ reg = Regexp.new( "^#{Regexp.escape( pending.last )}" )
48
+ multiplier = START_VALUE
49
+ result = 0
50
+ while pending.size > 0 && current
51
+ if reg =~ current
52
+ point_score = (2**POINT_DEPTH) - current.count( '.' ) + Regexp.last_match[0].count( '.' )
53
+ result += point_score * multiplier
54
+ pending.pop
55
+ reg = Regexp.new( "^#{Regexp.escape( pending.last )}" ) if pending.size > 0
56
+ end
57
+ multiplier = multiplier / BASE
58
+ reference_array.pop
59
+ current = reference_array.last
63
60
  end
64
- end
61
+ result = 0 if pending.size > 0
62
+ result
63
+ end
64
+ end
65
65
  end
@@ -1,286 +1,327 @@
1
- require 'plist'
1
+ module Textpow
2
+ RUBY_19 = (RUBY_VERSION > "1.9.0")
3
+ end
4
+ require 'oniguruma' unless Textpow::RUBY_19
2
5
 
3
6
  module Textpow
7
+ # at load time we do not know all patterns / all syntaxes
8
+ # so we store a proxy, that tries to find the correct syntax at runtime
9
+ class SyntaxProxy
10
+ def initialize(included_name, syntax)
11
+ @syntax = syntax
12
+ @included_name = included_name
13
+ end
4
14
 
5
- class SyntaxProxy
6
- def initialize hash, syntax
7
- @syntax = syntax
8
- @proxy = hash["include"]
15
+ def method_missing method, *args, &block
16
+ if @proxy ||= proxy
17
+ @proxy.send(method, *args, &block)
18
+ else
19
+ STDERR.puts "Failed proxying #{@proxy_name}.#{method}(#{args.join(', ')})" if $DEBUG
9
20
  end
10
-
11
- def method_missing method, *args, &block
12
- if @proxy
13
- @proxy_value = proxy unless @proxy_value
14
- if @proxy_value
15
- @proxy_value.send(method, *args, &block)
16
- else
17
- STDERR.puts "Failed proxying #{@proxy}.#{method}(#{args.join(', ')})"
18
- end
19
- end
21
+ end
22
+
23
+ private
24
+
25
+ def proxy
26
+ case @included_name
27
+ when /^#/
28
+ @syntax.repository and @syntax.repository[@included_name[1..-1]]
29
+ when "$self", "$base"
30
+ @syntax
31
+ else
32
+ @syntax.syntaxes[@included_name] || Textpow.syntax(@included_name)
20
33
  end
21
-
22
- def proxy
23
- case @proxy
24
- when /^#/
25
- if @syntax.repository && @syntax.repository[@proxy[1..-1]]
26
- #puts "Repository"
27
- #@table["syntax"].repository.each_key{|k| puts k}
28
- return @syntax.repository[@proxy[1..-1]]
29
- end
30
- when "$self"
31
- return @syntax
32
- when "$base"
33
- return @syntax
34
- else
35
- return @syntax.syntaxes[@proxy]
36
- end
34
+ end
35
+ end
36
+
37
+ class SyntaxNode
38
+ @@syntaxes = {}
39
+
40
+ attr_accessor :syntax
41
+ attr_accessor :firstLineMatch
42
+ attr_accessor :foldingStartMarker
43
+ attr_accessor :foldingStopMarker
44
+ attr_accessor :match
45
+ attr_accessor :begin
46
+ attr_accessor :content
47
+ attr_accessor :fileTypes
48
+ attr_accessor :name
49
+ attr_accessor :contentName
50
+ attr_accessor :end
51
+ attr_accessor :scopeName
52
+ attr_accessor :keyEquivalent
53
+ attr_accessor :captures
54
+ attr_accessor :beginCaptures
55
+ attr_accessor :endCaptures
56
+ attr_accessor :repository
57
+ attr_accessor :patterns
58
+
59
+ def self.load(file, options={})
60
+ table = convert_file_to_table(file)
61
+ SyntaxNode.new(table, options)
62
+ end
63
+
64
+ def initialize(table, options={})
65
+ @syntax = options[:syntax] || self
66
+ @name_space = options[:name_space]
67
+
68
+ register_in_syntaxes(table["scopeName"])
69
+ parse_and_store_syntax_info(table)
70
+ end
71
+
72
+ def syntaxes
73
+ @@syntaxes[@name_space]
74
+ end
75
+
76
+ def parse(string, processor = RecordingProcessor.new)
77
+ processor.start_parsing scopeName
78
+ stack = [[self, nil]]
79
+ string.each_line do |line|
80
+ parse_line stack, line, processor
37
81
  end
38
- end
39
-
40
- class SyntaxNode
41
- OPTIONS = {:options => Oniguruma::OPTION_CAPTURE_GROUP}
42
-
43
- @@syntaxes = {}
44
-
45
- attr_accessor :syntax
46
- attr_accessor :firstLineMatch
47
- attr_accessor :foldingStartMarker
48
- attr_accessor :foldingStopMarker
49
- attr_accessor :match
50
- attr_accessor :begin
51
- attr_accessor :content
52
- attr_accessor :fileTypes
53
- attr_accessor :name
54
- attr_accessor :contentName
55
- attr_accessor :end
56
- attr_accessor :scopeName
57
- attr_accessor :keyEquivalent
58
- attr_accessor :captures
59
- attr_accessor :beginCaptures
60
- attr_accessor :endCaptures
61
- attr_accessor :repository
62
- attr_accessor :patterns
63
-
64
- def self.load filename, name_space = :default
65
- table = nil
66
- case filename
67
- when /(\.tmSyntax|\.plist)$/
68
- table = Plist::parse_xml( filename )
69
- else
70
- File.open( filename ) do |f|
71
- table = YAML.load( f )
72
- end
73
- end
74
- if table
75
- SyntaxNode.new( table, nil, name_space )
76
- else
77
- nil
78
- end
82
+ processor.end_parsing scopeName
83
+
84
+ processor
85
+ end
86
+
87
+ protected
88
+
89
+ def parse_and_store_syntax_info(table)
90
+ table.each do |key, value|
91
+ case key
92
+ when "firstLineMatch", "foldingStartMarker", "foldingStopMarker", "match", "begin"
93
+ instance_variable_set("@#{key}", parse_regex(value))
94
+ when "content", "fileTypes", "name", "contentName", "end", "scopeName", "keyEquivalent"
95
+ instance_variable_set("@#{key}", value)
96
+ when "captures", "beginCaptures", "endCaptures"
97
+ instance_variable_set("@#{key}", value.sort)
98
+ when "repository"
99
+ parse_repository value
100
+ when "patterns"
101
+ create_children value
102
+ when "comment"
103
+ else
104
+ STDERR.puts "Ignoring: #{key} => #{value.gsub("\n", "\n>>")}" if $DEBUG
105
+ end
79
106
  end
80
-
81
- def initialize hash, syntax = nil, name_space = :default
82
- @name_space = name_space
83
- @@syntaxes[@name_space] ||= {}
84
- @@syntaxes[@name_space][hash["scopeName"]] = self if hash["scopeName"]
85
- @syntax = syntax || self
86
- hash.each do |key, value|
87
- case key
88
- when "firstLineMatch", "foldingStartMarker", "foldingStopMarker", "match", "begin"
89
- begin
90
- instance_variable_set( "@#{key}", Oniguruma::ORegexp.new( value, OPTIONS ) )
91
- rescue ArgumentError => e
92
- raise ParsingError, "Parsing error in #{value}: #{e.to_s}"
93
- end
94
- when "content", "fileTypes", "name", "contentName", "end", "scopeName", "keyEquivalent"
95
- instance_variable_set( "@#{key}", value )
96
- when "captures", "beginCaptures", "endCaptures"
97
- instance_variable_set( "@#{key}", value.sort )
98
- when "repository"
99
- parse_repository value
100
- when "patterns"
101
- create_children value
102
- else
103
- STDERR.puts "Ignoring: #{key} => #{value.gsub("\n", "\n>>")}" if $DEBUG
104
- end
105
- end
107
+ end
108
+
109
+ def parse_regex(value)
110
+ if Textpow::RUBY_19
111
+ parse_regex_with_invalid_chars(value)
112
+ else
113
+ Oniguruma::ORegexp.new(value, :options => Oniguruma::OPTION_CAPTURE_GROUP)
106
114
  end
107
-
108
-
109
- def syntaxes
110
- @@syntaxes[@name_space]
115
+ rescue RegexpError, ArgumentError => e
116
+ raise ParsingError, "Parsing error in #{value}: #{e.to_s}"
117
+ end
118
+
119
+ def parse_regex_with_invalid_chars(value)
120
+ Regexp.new(value.force_encoding('UTF-8'))
121
+ rescue RegexpError => e
122
+ if e.message =~ /UTF-8/ or e.message =~ /invalid multibyte escape/
123
+ puts "Ignored utf8 regex error #{$!}"
124
+ /INVALID_UTF8/
125
+ else
126
+ raise e
111
127
  end
112
-
113
- def parse( string, processor = nil )
114
- processor.start_parsing self.scopeName if processor
115
- stack = [[self, nil]]
116
- string.each_line do |line|
117
- parse_line stack, line, processor
118
- end
119
- processor.end_parsing self.scopeName if processor
120
- processor
128
+ end
129
+
130
+ # register in global syntax list -> can be found by include
131
+ def register_in_syntaxes(scope)
132
+ @@syntaxes[@name_space] ||= {}
133
+ @@syntaxes[@name_space][scope] = self if scope
134
+ end
135
+
136
+ def self.convert_file_to_table(file)
137
+ raise "File not found: #{file}" unless File.exist?(file)
138
+ case file
139
+ when /(\.tmSyntax|\.plist)$/
140
+ require 'plist'
141
+ Plist::parse_xml(file)
142
+ else
143
+ YAML.load_file(file)
121
144
  end
122
-
123
- protected
124
-
125
- def parse_repository repository
126
- @repository = {}
127
- repository.each do |key, value|
128
- if value["include"]
129
- @repository[key] = SyntaxProxy.new( value, self.syntax )
130
- else
131
- @repository[key] = SyntaxNode.new( value, self.syntax, @name_space )
132
- end
133
- end
145
+ end
146
+
147
+ def parse_repository(repository)
148
+ @repository = {}
149
+ repository.each do |key, value|
150
+ if value["include"]
151
+ @repository[key] = SyntaxProxy.new(value["include"], syntax)
152
+ else
153
+ @repository[key] = SyntaxNode.new(value, :syntax => syntax, :name_space => @name_space)
154
+ end
134
155
  end
135
-
136
- def create_children patterns
137
- @patterns = []
138
- patterns.each do |p|
139
- if p["include"]
140
- @patterns << SyntaxProxy.new( p, self.syntax )
141
- else
142
- @patterns << SyntaxNode.new( p, self.syntax, @name_space )
143
- end
144
- end
156
+ end
157
+
158
+ def create_children(patterns)
159
+ @patterns = patterns.map do |pattern|
160
+ if pattern["include"]
161
+ SyntaxProxy.new(pattern["include"], syntax)
162
+ else
163
+ SyntaxNode.new(pattern, :syntax => syntax, :name_space => @name_space)
164
+ end
165
+ end
166
+ end
167
+
168
+ def parse_captures name, pattern, match, processor
169
+ captures = pattern.match_captures( name, match )
170
+ captures.reject! { |group, range, name| ! range.first || range.first == range.last }
171
+ starts = []
172
+ ends = []
173
+ captures.each do |group, range, name|
174
+ starts << [range.first, group, name]
175
+ ends << [range.last, -group, name]
145
176
  end
146
177
 
147
- def parse_captures name, pattern, match, processor
148
- captures = pattern.match_captures( name, match )
149
- captures.reject! { |group, range, name| ! range.first || range.first == range.last }
150
- starts = []
151
- ends = []
152
- captures.each do |group, range, name|
153
- starts << [range.first, group, name]
154
- ends << [range.last, -group, name]
155
- end
156
-
157
178
  # STDERR.puts '-' * 100
158
179
  # starts.sort!.reverse!.each{|c| STDERR.puts c.join(', ')}
159
180
  # STDERR.puts
160
181
  # ends.sort!.reverse!.each{|c| STDERR.puts c.join(', ')}
161
- starts.sort!.reverse!
162
- ends.sort!.reverse!
163
-
164
- while ! starts.empty? || ! ends.empty?
165
- if starts.empty?
166
- pos, key, name = ends.pop
167
- processor.close_tag name, pos
168
- elsif ends.empty?
169
- pos, key, name = starts.pop
170
- processor.open_tag name, pos
171
- elsif ends.last[1].abs < starts.last[1]
172
- pos, key, name = ends.pop
173
- processor.close_tag name, pos
174
- else
175
- pos, key, name = starts.pop
176
- processor.open_tag name, pos
177
- end
178
- end
182
+ starts.sort!.reverse!
183
+ ends.sort!.reverse!
184
+
185
+ while ! starts.empty? || ! ends.empty?
186
+ if starts.empty?
187
+ pos, key, name = ends.pop
188
+ processor.close_tag name, pos
189
+ elsif ends.empty?
190
+ pos, key, name = starts.pop
191
+ processor.open_tag name, pos
192
+ elsif ends.last[1].abs < starts.last[1]
193
+ pos, key, name = ends.pop
194
+ processor.close_tag name, pos
195
+ else
196
+ pos, key, name = starts.pop
197
+ processor.open_tag name, pos
198
+ end
179
199
  end
180
-
181
- def match_captures name, match
182
- matches = []
183
- captures = instance_variable_get "@#{name}"
184
- if captures
185
- captures.each do |key, value|
186
- if key =~ /^\d*$/
187
- matches << [key.to_i, match.offset( key.to_i ), value["name"]] if key.to_i < match.size
188
- else
189
- matches << [match.to_index( key.to_sym ), match.offset( key.to_sym), value["name"]] if match.to_index( key.to_sym )
190
- end
191
- end
192
- end
193
- matches
200
+ end
201
+
202
+ def match_captures name, match
203
+ matches = []
204
+ captures = instance_variable_get "@#{name}"
205
+ if captures
206
+ captures.each do |key, value|
207
+ if key =~ /^\d*$/
208
+ matches << [key.to_i, match.offset( key.to_i ), value["name"]] if key.to_i < match.size
209
+ else
210
+ matches << [match.to_index( key.to_sym ), match.offset( key.to_sym), value["name"]] if match.to_index( key.to_sym )
211
+ end
212
+ end
213
+ end
214
+ matches
215
+ end
216
+
217
+ def match_first string, position
218
+ if self.match
219
+ if match = self.match.match( string, position )
220
+ return [self, match]
221
+ end
222
+ elsif self.begin
223
+ if match = self.begin.match( string, position )
224
+ return [self, match]
225
+ end
226
+ elsif self.end
227
+ else
228
+ return match_first_son( string, position )
194
229
  end
195
-
196
- def match_first string, position
197
- if self.match
198
- if match = self.match.match( string, position )
199
- return [self, match]
200
- end
201
- elsif self.begin
202
- if match = self.begin.match( string, position )
203
- return [self, match]
204
- end
205
- elsif self.end
206
- else
207
- return match_first_son( string, position )
208
- end
209
- nil
230
+ nil
231
+ end
232
+
233
+ def match_end string, match, position
234
+ regstring = self.end.clone
235
+ regstring.gsub!( /\\([1-9])/ ) { |s| match[$1.to_i] }
236
+
237
+ # in spox-textpow this is \\g in 1.9 !?
238
+ regstring.gsub!( /\\k<(.*?)>/ ) { |s| match[$1.to_sym] }
239
+ if Textpow::RUBY_19
240
+ Regexp.new( regstring ).match( string, position )
241
+ else
242
+ Oniguruma::ORegexp.new( regstring ).match( string, position )
210
243
  end
211
-
212
- def match_end string, match, position
213
- regstring = self.end.clone
214
- regstring.gsub!( /\\([1-9])/ ) { |s| match[$1.to_i] }
215
- regstring.gsub!( /\\k<(.*?)>/ ) { |s| match[$1.to_sym] }
216
- Oniguruma::ORegexp.new( regstring ).match( string, position )
244
+ end
245
+
246
+ # find earliest matching pattern
247
+ def match_first_son(string, position)
248
+ return if not patterns
249
+
250
+ earliest_match = nil
251
+ earliest_match_offset = nil
252
+ patterns.each do |pattern|
253
+ next unless match = pattern.match_first(string, position)
254
+
255
+ match_offset = match_offset(match[1]).first
256
+ return match if match_offset == 0 # no need to look any further
257
+
258
+ if not earliest_match or earliest_match_offset > match_offset
259
+ earliest_match = match
260
+ earliest_match_offset = match_offset
261
+ end
217
262
  end
218
-
219
- def match_first_son string, position
220
- match = nil
221
- if self.patterns
222
- self.patterns.each do |p|
223
- tmatch = p.match_first string, position
224
- if tmatch
225
- if ! match || match[1].offset.first > tmatch[1].offset.first
226
- match = tmatch
227
- end
228
- #break if tmatch[1].offset.first == position
229
- end
230
- end
231
- end
232
- match
263
+
264
+ earliest_match
265
+ end
266
+
267
+ def parse_line(stack, line, processor)
268
+ processor.new_line line
269
+ top, match = stack.last
270
+ position = 0
271
+ #@ln ||= 0
272
+ #@ln += 1
273
+ #STDERR.puts @ln
274
+ loop do
275
+ if top.patterns
276
+ pattern, pattern_match = top.match_first_son(line, position)
277
+ end
278
+
279
+ if top.end
280
+ end_match = top.match_end( line, match, position )
281
+ end
282
+
283
+ if end_match and (not pattern_match or match_offset(pattern_match).first >= match_offset(end_match).first)
284
+ pattern_match = end_match
285
+ start_pos = match_offset(pattern_match).first
286
+ end_pos = match_offset(pattern_match).last
287
+
288
+ processor.close_tag top.contentName, start_pos if top.contentName
289
+ parse_captures "captures", top, pattern_match, processor
290
+ parse_captures "endCaptures", top, pattern_match, processor
291
+ processor.close_tag top.name, end_pos if top.name
292
+ stack.pop
293
+ top, match = stack.last
294
+ else
295
+ break unless pattern
296
+
297
+ start_pos = match_offset(pattern_match).first
298
+ end_pos = match_offset(pattern_match).last
299
+
300
+ if pattern.begin
301
+ processor.open_tag pattern.name, start_pos if pattern.name
302
+ parse_captures "captures", pattern, pattern_match, processor
303
+ parse_captures "beginCaptures", pattern, pattern_match, processor
304
+ processor.open_tag pattern.contentName, end_pos if pattern.contentName
305
+ top = pattern
306
+ match = pattern_match
307
+ stack << [top, match]
308
+ elsif pattern.match
309
+ processor.open_tag pattern.name, start_pos if pattern.name
310
+ parse_captures "captures", pattern, pattern_match, processor
311
+ processor.close_tag pattern.name, end_pos if pattern.name
312
+ end
313
+ end
314
+
315
+ position = end_pos
233
316
  end
234
-
235
- def parse_line stack, line, processor
236
- processor.new_line line if processor
237
- top, match = stack.last
238
- position = 0
239
- #@ln ||= 0
240
- #@ln += 1
241
- #STDERR.puts @ln
242
- while true
243
- if top.patterns
244
- pattern, pattern_match = top.match_first_son line, position
245
- else
246
- pattern, pattern_match = nil
247
- end
248
-
249
- end_match = nil
250
- if top.end
251
- end_match = top.match_end( line, match, position )
252
- end
253
-
254
- if end_match && ( ! pattern_match || pattern_match.offset.first >= end_match.offset.first )
255
- pattern_match = end_match
256
- start_pos = pattern_match.offset.first
257
- end_pos = pattern_match.offset.last
258
- processor.close_tag top.contentName, start_pos if top.contentName && processor
259
- parse_captures "captures", top, pattern_match, processor if processor
260
- parse_captures "endCaptures", top, pattern_match, processor if processor
261
- processor.close_tag top.name, end_pos if top.name && processor
262
- stack.pop
263
- top, match = stack.last
264
- else
265
- break unless pattern
266
- start_pos = pattern_match.offset.first
267
- end_pos = pattern_match.offset.last
268
- if pattern.begin
269
- processor.open_tag pattern.name, start_pos if pattern.name && processor
270
- parse_captures "captures", pattern, pattern_match, processor if processor
271
- parse_captures "beginCaptures", pattern, pattern_match, processor if processor
272
- processor.open_tag pattern.contentName, end_pos if pattern.contentName && processor
273
- top = pattern
274
- match = pattern_match
275
- stack << [top, match]
276
- elsif pattern.match
277
- processor.open_tag pattern.name, start_pos if pattern.name && processor
278
- parse_captures "captures", pattern, pattern_match, processor if processor
279
- processor.close_tag pattern.name, end_pos if pattern.name && processor
280
- end
281
- end
282
- position = end_pos
283
- end
317
+ end
318
+
319
+ def match_offset(match)
320
+ if Textpow::RUBY_19
321
+ match.offset(0)
322
+ else
323
+ match.offset
284
324
  end
285
- end
325
+ end
326
+ end
286
327
  end