textpow 0.10.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. data/.travis.yml +7 -0
  2. data/Gemfile +10 -0
  3. data/Gemfile.lock +30 -0
  4. data/{History.txt → History.rdoc} +10 -0
  5. data/{README.txt → MIT-LICENSE.txt} +0 -30
  6. data/README.rdoc +82 -0
  7. data/Rakefile +42 -49
  8. data/bin/plist2syntax +0 -0
  9. data/bin/plist2yaml +0 -0
  10. data/examples/benchmark_js.rb +13 -0
  11. data/examples/jquery.js +9046 -0
  12. data/lib/textpow.rb +43 -3
  13. data/lib/textpow/debug_processor.rb +35 -35
  14. data/lib/textpow/recording_processor.rb +13 -0
  15. data/lib/textpow/score_manager.rb +60 -60
  16. data/lib/textpow/syntax.rb +303 -262
  17. data/lib/textpow/syntax/broken/markdown.syntax +519 -0
  18. data/lib/textpow/syntax/broken/php.syntax +1253 -0
  19. data/lib/textpow/syntax/buggy/nemerle.syntax +74 -0
  20. data/lib/textpow/syntax/old/YAML.yaml +160 -0
  21. data/lib/textpow/syntax/old/txt2tags.yaml +139 -0
  22. data/lib/textpow/syntax/source.actionscript.syntax +97 -0
  23. data/lib/textpow/syntax/source.active4d.library.syntax +21 -0
  24. data/lib/textpow/syntax/source.active4d.syntax +276 -0
  25. data/lib/textpow/syntax/source.ada.syntax +33 -0
  26. data/lib/textpow/syntax/source.antlr.syntax +151 -0
  27. data/lib/textpow/syntax/source.apache-config.mod_perl.syntax +50 -0
  28. data/lib/textpow/syntax/source.apache-config.syntax +191 -0
  29. data/lib/textpow/syntax/source.applescript.syntax +384 -0
  30. data/lib/textpow/syntax/source.asp.syntax +70 -0
  31. data/lib/textpow/syntax/source.asp.vb.net.syntax +129 -0
  32. data/lib/textpow/syntax/source.c++.qt.syntax +26 -0
  33. data/lib/textpow/syntax/source.c++.syntax +186 -0
  34. data/lib/textpow/syntax/source.c-sharp.syntax +59 -0
  35. data/lib/textpow/syntax/source.c.ragel.syntax +201 -0
  36. data/lib/textpow/syntax/source.c.syntax +414 -0
  37. data/lib/textpow/syntax/source.camlp4.ocaml.syntax +36 -0
  38. data/lib/textpow/syntax/source.cm.syntax +32 -0
  39. data/lib/textpow/syntax/source.coffee.syntax +216 -0
  40. data/lib/textpow/syntax/source.context-free.syntax +176 -0
  41. data/lib/textpow/syntax/source.css.beta.syntax +1925 -0
  42. data/lib/textpow/syntax/source.css.syntax +195 -0
  43. data/lib/textpow/syntax/source.d.syntax +142 -0
  44. data/lib/textpow/syntax/source.diff.syntax +81 -0
  45. data/lib/textpow/syntax/source.dot.syntax +47 -0
  46. data/lib/textpow/syntax/source.dylan.syntax +62 -0
  47. data/lib/textpow/syntax/source.eiffel.syntax +78 -0
  48. data/lib/textpow/syntax/source.erlang.syntax +922 -0
  49. data/lib/textpow/syntax/source.fortran.syntax +141 -0
  50. data/lib/textpow/syntax/source.fscript.syntax +80 -0
  51. data/lib/textpow/syntax/source.fxscript.syntax +142 -0
  52. data/lib/textpow/syntax/source.gri.syntax +83 -0
  53. data/lib/textpow/syntax/source.groovy.groovy.syntax +191 -0
  54. data/lib/textpow/syntax/source.haskell.syntax +88 -0
  55. data/lib/textpow/syntax/source.icalendar.syntax +32 -0
  56. data/lib/textpow/syntax/source.inform.syntax +48 -0
  57. data/lib/textpow/syntax/source.ini.syntax +55 -0
  58. data/lib/textpow/syntax/source.io.syntax +81 -0
  59. data/lib/textpow/syntax/source.java-props.syntax +20 -0
  60. data/lib/textpow/syntax/source.java.syntax +211 -0
  61. data/lib/textpow/syntax/source.js.greasemonkey.syntax +34 -0
  62. data/lib/textpow/syntax/source.js.jquery.syntax +114 -0
  63. data/lib/textpow/syntax/source.js.mootools.syntax +572 -0
  64. data/lib/textpow/syntax/source.js.prototype.bracketed.syntax +140 -0
  65. data/lib/textpow/syntax/source.js.prototype.syntax +72 -0
  66. data/lib/textpow/syntax/source.js.syntax +256 -0
  67. data/lib/textpow/syntax/source.js.yui.syntax +176 -0
  68. data/lib/textpow/syntax/source.json.syntax +136 -0
  69. data/lib/textpow/syntax/source.lex.syntax +219 -0
  70. data/lib/textpow/syntax/source.lighttpd-config.syntax +54 -0
  71. data/lib/textpow/syntax/source.lilypond.syntax +492 -0
  72. data/lib/textpow/syntax/source.lisp.syntax +61 -0
  73. data/lib/textpow/syntax/source.logo.syntax +29 -0
  74. data/lib/textpow/syntax/source.logtalk.syntax +152 -0
  75. data/lib/textpow/syntax/source.lua.syntax +86 -0
  76. data/lib/textpow/syntax/source.makefile.syntax +36 -0
  77. data/lib/textpow/syntax/source.matlab.syntax +142 -0
  78. data/lib/textpow/syntax/source.mel.syntax +92 -0
  79. data/lib/textpow/syntax/source.mips.syntax +66 -0
  80. data/lib/textpow/syntax/source.ml.syntax +121 -0
  81. data/lib/textpow/syntax/source.modula-3.syntax +47 -0
  82. data/lib/textpow/syntax/source.nant-build.syntax +53 -0
  83. data/lib/textpow/syntax/source.objc++.syntax +18 -0
  84. data/lib/textpow/syntax/source.objc.syntax +233 -0
  85. data/lib/textpow/syntax/source.ocaml.syntax +764 -0
  86. data/lib/textpow/syntax/source.ocamllex.syntax +167 -0
  87. data/lib/textpow/syntax/source.ocamlyacc.syntax +184 -0
  88. data/lib/textpow/syntax/source.open-gl.syntax +14 -0
  89. data/lib/textpow/syntax/source.pascal.syntax +77 -0
  90. data/lib/textpow/syntax/source.pascal.vectorscript.syntax +57 -0
  91. data/lib/textpow/syntax/source.perl.syntax +1113 -0
  92. data/lib/textpow/syntax/source.php.cake.syntax +55 -0
  93. data/lib/textpow/syntax/source.plist.tm-grammar.syntax +708 -0
  94. data/lib/textpow/syntax/source.postscript.syntax +114 -0
  95. data/lib/textpow/syntax/source.processing.syntax +106 -0
  96. data/lib/textpow/syntax/source.prolog.syntax +40 -0
  97. data/lib/textpow/syntax/source.python.django.syntax +21 -0
  98. data/lib/textpow/syntax/source.python.syntax +868 -0
  99. data/lib/textpow/syntax/source.qmake.syntax +114 -0
  100. data/lib/textpow/syntax/source.quake-config.syntax +32 -0
  101. data/lib/textpow/syntax/source.r-console.syntax +16 -0
  102. data/lib/textpow/syntax/source.r.syntax +81 -0
  103. data/lib/textpow/syntax/source.regexp.oniguruma.syntax +107 -0
  104. data/lib/textpow/syntax/source.regexp.python.syntax +109 -0
  105. data/lib/textpow/syntax/source.regexp.syntax +50 -0
  106. data/lib/textpow/syntax/source.remind.syntax +253 -0
  107. data/lib/textpow/syntax/source.rez.syntax +80 -0
  108. data/lib/textpow/syntax/source.ruby.experimental.syntax +145 -0
  109. data/lib/textpow/syntax/source.ruby.rails.syntax +88 -0
  110. data/lib/textpow/syntax/source.ruby.syntax +1035 -0
  111. data/lib/textpow/syntax/source.s5.syntax +69 -0
  112. data/lib/textpow/syntax/source.sass.syntax +45 -0
  113. data/lib/textpow/syntax/source.scheme.syntax +347 -0
  114. data/lib/textpow/syntax/source.scilab.syntax +41 -0
  115. data/lib/textpow/syntax/source.scss.syntax +527 -0
  116. data/lib/textpow/syntax/source.shell.syntax +384 -0
  117. data/lib/textpow/syntax/source.slate.syntax +149 -0
  118. data/lib/textpow/syntax/source.smarty.syntax +63 -0
  119. data/lib/textpow/syntax/source.sql.ruby.syntax +18 -0
  120. data/lib/textpow/syntax/source.sql.syntax +237 -0
  121. data/lib/textpow/syntax/source.ssh-config.syntax +33 -0
  122. data/lib/textpow/syntax/source.strings.syntax +39 -0
  123. data/lib/textpow/syntax/source.swig.syntax +57 -0
  124. data/lib/textpow/syntax/source.tcl.macports.syntax +163 -0
  125. data/lib/textpow/syntax/source.tcl.syntax +152 -0
  126. data/lib/textpow/syntax/source.yaml.syntax +160 -0
  127. data/lib/textpow/syntax/text.active4d-ini.syntax +50 -0
  128. data/lib/textpow/syntax/text.bbcode.syntax +287 -0
  129. data/lib/textpow/syntax/text.bibtex.syntax +151 -0
  130. data/lib/textpow/syntax/text.blog.html.syntax +41 -0
  131. data/lib/textpow/syntax/text.blog.markdown.syntax +42 -0
  132. data/lib/textpow/syntax/text.blog.syntax +27 -0
  133. data/lib/textpow/syntax/text.blog.textile.syntax +27 -0
  134. data/lib/textpow/syntax/text.gtdalt.syntax +143 -0
  135. data/lib/textpow/syntax/text.haml.syntax +88 -0
  136. data/lib/textpow/syntax/text.html.asp.net.syntax +424 -0
  137. data/lib/textpow/syntax/text.html.asp.syntax +27 -0
  138. data/lib/textpow/syntax/text.html.basic.syntax +362 -0
  139. data/lib/textpow/syntax/text.html.cfm.syntax +119 -0
  140. data/lib/textpow/syntax/text.html.django.syntax +36 -0
  141. data/lib/textpow/syntax/text.html.dokuwiki.syntax +204 -0
  142. data/lib/textpow/syntax/text.html.doxygen.syntax +43 -0
  143. data/lib/textpow/syntax/text.html.markdown.multimarkdown.syntax +39 -0
  144. data/lib/textpow/syntax/text.html.mason.syntax +119 -0
  145. data/lib/textpow/syntax/text.html.mediawiki.syntax +567 -0
  146. data/lib/textpow/syntax/text.html.mt.syntax +162 -0
  147. data/lib/textpow/syntax/text.html.ruby.syntax +40 -0
  148. data/lib/textpow/syntax/text.html.strict.active4d.syntax +311 -0
  149. data/lib/textpow/syntax/text.html.tcl.syntax +26 -0
  150. data/lib/textpow/syntax/text.html.textile.syntax +215 -0
  151. data/lib/textpow/syntax/text.html.tt.syntax +121 -0
  152. data/lib/textpow/syntax/text.html.twiki.syntax +241 -0
  153. data/lib/textpow/syntax/text.html.xhtml.1-strict.syntax +4027 -0
  154. data/lib/textpow/syntax/text.log.latex.syntax +50 -0
  155. data/lib/textpow/syntax/text.mail.markdown.syntax +118 -0
  156. data/lib/textpow/syntax/text.man.syntax +17 -0
  157. data/lib/textpow/syntax/text.moinmoin.syntax +189 -0
  158. data/lib/textpow/syntax/text.plain.gtd.syntax +22 -0
  159. data/lib/textpow/syntax/text.plain.release-notes.syntax +46 -0
  160. data/lib/textpow/syntax/text.plain.syntax +32 -0
  161. data/lib/textpow/syntax/text.plist.syntax +635 -0
  162. data/lib/textpow/syntax/text.pmwiki.syntax +113 -0
  163. data/lib/textpow/syntax/text.restructuredtext.syntax +250 -0
  164. data/lib/textpow/syntax/text.setext.syntax +147 -0
  165. data/lib/textpow/syntax/text.subversion-commit.syntax +36 -0
  166. data/lib/textpow/syntax/text.tabular.csv.syntax +68 -0
  167. data/lib/textpow/syntax/text.tabular.tsv.syntax +50 -0
  168. data/lib/textpow/syntax/text.tex.latex.beamer.syntax +41 -0
  169. data/lib/textpow/syntax/text.tex.latex.haskell.syntax +24 -0
  170. data/lib/textpow/syntax/text.tex.latex.memoir.syntax +64 -0
  171. data/lib/textpow/syntax/text.tex.latex.rd.syntax +91 -0
  172. data/lib/textpow/syntax/text.tex.latex.sweave.syntax +84 -0
  173. data/lib/textpow/syntax/text.tex.latex.syntax +566 -0
  174. data/lib/textpow/syntax/text.tex.math.syntax +49 -0
  175. data/lib/textpow/syntax/text.tex.syntax +86 -0
  176. data/lib/textpow/syntax/text.txt2tags.syntax +79 -0
  177. data/lib/textpow/syntax/text.xml.apple-dist.syntax +77 -0
  178. data/lib/textpow/syntax/text.xml.strict.syntax +92 -0
  179. data/lib/textpow/syntax/text.xml.syntax +180 -0
  180. data/lib/textpow/syntax/text.xml.xsl.syntax +60 -0
  181. data/lib/textpow/version.rb +3 -0
  182. data/spec/fixtures/objeck.plist +107 -0
  183. data/spec/fixtures/utf8.txt +1 -0
  184. data/spec/spec_helper.rb +2 -0
  185. data/spec/textpow/score_manager_spec.rb +20 -0
  186. data/spec/textpow/syntax_files_spec.rb +26 -0
  187. data/spec/textpow/syntax_spec.rb +225 -0
  188. data/spec/textpow_spec.rb +57 -0
  189. data/textpow.gemspec +19 -0
  190. metadata +246 -68
  191. data/Manifest.txt +0 -13
  192. data/mm/manual.mm +0 -266
  193. data/test/test_textpow.rb +0 -25
@@ -1,10 +1,50 @@
1
1
  require 'yaml'
2
- require 'oniguruma'
3
2
  require 'textpow/syntax'
4
3
  require 'textpow/debug_processor'
4
+ require 'textpow/recording_processor'
5
5
  require 'textpow/score_manager'
6
-
6
+ require 'textpow/version'
7
7
 
8
8
  module Textpow
9
- class ParsingError < Exception; end
9
+ class ParsingError < Exception; end
10
+
11
+ def self.syntax_path
12
+ File.join(File.dirname(__FILE__), 'textpow', 'syntax')
13
+ end
14
+
15
+ @@syntax = {}
16
+ def self.syntax(syntax_name)
17
+ syntax_name = syntax_name.downcase
18
+ if @@syntax.has_key?(syntax_name)
19
+ @@syntax[syntax_name]
20
+ else
21
+ @@syntax[syntax_name] = uncached_syntax(syntax_name)
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def self.uncached_syntax(name)
28
+ path = (
29
+ find_syntax_by_path(name) ||
30
+ find_syntax_by_scope_name(name) ||
31
+ find_syntax_by_fuzzy_name(name)
32
+ )
33
+ SyntaxNode.load(path) if path
34
+ end
35
+
36
+ def self.find_syntax_by_scope_name(name)
37
+ path = File.join(syntax_path, "#{name}.syntax")
38
+ path if File.exist?(path)
39
+ end
40
+
41
+ def self.find_syntax_by_fuzzy_name(name)
42
+ path = Dir.glob(File.join(syntax_path, "*.#{name}.*")).sort_by(&:size).first
43
+ path if path and File.exist?(path)
44
+ end
45
+
46
+ def self.find_syntax_by_path(path)
47
+ path if File.file?(path)
48
+ end
10
49
  end
50
+
@@ -1,36 +1,36 @@
1
- module Textpow
2
- class DebugProcessor
3
- def initialize
4
- @line_number = 0
5
- @printable_line = ""
6
- end
7
-
8
- def pprint line, string, position = 0
9
- line.replace line.ljust( position + string.size, " ")
10
- line[position,string.size] = string
11
- line
12
- end
13
-
14
- def open_tag name, position
15
- STDERR.puts pprint( "", "{#{name}", position + @line_marks.size)
16
- end
17
-
18
- def close_tag name, position
19
- STDERR.puts pprint( "", "}#{name}", position + @line_marks.size)
20
- end
21
-
22
- def new_line line
23
- @line_number += 1
24
- @line_marks = "[#{@line_number.to_s.rjust( 4, '0' )}] "
25
- STDERR.puts "#{@line_marks}#{line}"
26
- end
27
-
28
- def start_parsing name
29
- STDERR.puts "{#{name}"
30
- end
31
-
32
- def end_parsing name
33
- STDERR.puts "}#{name}"
34
- end
35
- end
1
+ module Textpow
2
+ class DebugProcessor
3
+ def initialize
4
+ @line_number = 0
5
+ @printable_line = ""
6
+ end
7
+
8
+ def pprint line, string, position = 0
9
+ line.replace line.ljust( position + string.size, " ")
10
+ line[position,string.size] = string
11
+ line
12
+ end
13
+
14
+ def open_tag name, position
15
+ STDERR.puts pprint( "", "{#{name}", position + @line_marks.size)
16
+ end
17
+
18
+ def close_tag name, position
19
+ STDERR.puts pprint( "", "}#{name}", position + @line_marks.size)
20
+ end
21
+
22
+ def new_line line
23
+ @line_number += 1
24
+ @line_marks = "[#{@line_number.to_s.rjust( 4, '0' )}] "
25
+ STDERR.puts "#{@line_marks}#{line}"
26
+ end
27
+
28
+ def start_parsing name
29
+ STDERR.puts "{#{name}"
30
+ end
31
+
32
+ def end_parsing name
33
+ STDERR.puts "}#{name}"
34
+ end
35
+ end
36
36
  end
@@ -0,0 +1,13 @@
1
+ module Textpow
2
+ class RecordingProcessor
3
+ attr_accessor :stack
4
+
5
+ def initialize
6
+ @stack = []
7
+ end
8
+
9
+ def method_missing(name, *args)
10
+ @stack << [name, *args]
11
+ end
12
+ end
13
+ end
@@ -1,65 +1,65 @@
1
1
  module Textpow
2
- class ScoreManager
3
- POINT_DEPTH = 4
4
- NESTING_DEPTH = 40
5
- START_VALUE = 2 ** ( POINT_DEPTH * NESTING_DEPTH )
6
- BASE = 2 ** POINT_DEPTH
7
-
8
- def initialize
9
- @scores = {}
10
- end
11
-
12
- def score search_scope, reference_scope
13
- max = 0
14
- search_scope.split( ',' ).each do |scope|
15
- arrays = scope.split(/\B-/)
16
- if arrays.size == 1
17
- max = [max, score_term( arrays[0], reference_scope )].max
18
- elsif arrays.size > 1
19
- excluded = false
20
- arrays[1..-1].each do |a|
21
- if score_term( arrays[1], reference_scope ) > 0
22
- excluded = true
23
- break
24
- end
25
- end
26
- max = [max, score_term( arrays[0], reference_scope )].max unless excluded
27
- else
28
- raise ParsingError, "Error in scope string: '#{search_scope}' #{arrays.size} is not a valid number of operands" if arrays.size < 1
2
+ class ScoreManager
3
+ POINT_DEPTH = 4
4
+ NESTING_DEPTH = 40
5
+ START_VALUE = 2 ** ( POINT_DEPTH * NESTING_DEPTH )
6
+ BASE = 2 ** POINT_DEPTH
7
+
8
+ def initialize
9
+ @scores = {}
10
+ end
11
+
12
+ def score search_scope, reference_scope
13
+ max = 0
14
+ search_scope.split( ',' ).each do |scope|
15
+ arrays = scope.split(/\B-/)
16
+ if arrays.size == 1
17
+ max = [max, score_term( arrays[0], reference_scope )].max
18
+ elsif arrays.size > 1
19
+ excluded = false
20
+ arrays[1..-1].each do |a|
21
+ if score_term( arrays[1], reference_scope ) > 0
22
+ excluded = true
23
+ break
29
24
  end
30
- end
31
- max
32
- end
33
-
34
- private
35
-
36
- def score_term search_scope, reference_scope
37
- unless @scores[reference_scope] && @scores[reference_scope][search_scope]
38
- @scores[reference_scope] ||= {}
39
- @scores[reference_scope][search_scope] = score_array( search_scope.split(' '), reference_scope.split( ' ' ) )
40
- end
41
- @scores[reference_scope][search_scope]
25
+ end
26
+ max = [max, score_term( arrays[0], reference_scope )].max unless excluded
27
+ else
28
+ raise ParsingError, "Error in scope string: '#{search_scope}' #{arrays.size} is not a valid number of operands" if arrays.size < 1
29
+ end
42
30
  end
43
-
44
- def score_array search_array, reference_array
45
- pending = search_array
46
- current = reference_array.last
47
- reg = Regexp.new( "^#{Regexp.escape( pending.last )}" )
48
- multiplier = START_VALUE
49
- result = 0
50
- while pending.size > 0 && current
51
- if reg =~ current
52
- point_score = (2**POINT_DEPTH) - current.count( '.' ) + Regexp.last_match[0].count( '.' )
53
- result += point_score * multiplier
54
- pending.pop
55
- reg = Regexp.new( "^#{Regexp.escape( pending.last )}" ) if pending.size > 0
56
- end
57
- multiplier = multiplier / BASE
58
- reference_array.pop
59
- current = reference_array.last
60
- end
61
- result = 0 if pending.size > 0
62
- result
31
+ max
32
+ end
33
+
34
+ private
35
+
36
+ def score_term search_scope, reference_scope
37
+ unless @scores[reference_scope] && @scores[reference_scope][search_scope]
38
+ @scores[reference_scope] ||= {}
39
+ @scores[reference_scope][search_scope] = score_array( search_scope.split(' '), reference_scope.split( ' ' ) )
40
+ end
41
+ @scores[reference_scope][search_scope]
42
+ end
43
+
44
+ def score_array search_array, reference_array
45
+ pending = search_array
46
+ current = reference_array.last
47
+ reg = Regexp.new( "^#{Regexp.escape( pending.last )}" )
48
+ multiplier = START_VALUE
49
+ result = 0
50
+ while pending.size > 0 && current
51
+ if reg =~ current
52
+ point_score = (2**POINT_DEPTH) - current.count( '.' ) + Regexp.last_match[0].count( '.' )
53
+ result += point_score * multiplier
54
+ pending.pop
55
+ reg = Regexp.new( "^#{Regexp.escape( pending.last )}" ) if pending.size > 0
56
+ end
57
+ multiplier = multiplier / BASE
58
+ reference_array.pop
59
+ current = reference_array.last
63
60
  end
64
- end
61
+ result = 0 if pending.size > 0
62
+ result
63
+ end
64
+ end
65
65
  end
@@ -1,286 +1,327 @@
1
- require 'plist'
1
+ module Textpow
2
+ RUBY_19 = (RUBY_VERSION > "1.9.0")
3
+ end
4
+ require 'oniguruma' unless Textpow::RUBY_19
2
5
 
3
6
  module Textpow
7
+ # at load time we do not know all patterns / all syntaxes
8
+ # so we store a proxy, that tries to find the correct syntax at runtime
9
+ class SyntaxProxy
10
+ def initialize(included_name, syntax)
11
+ @syntax = syntax
12
+ @included_name = included_name
13
+ end
4
14
 
5
- class SyntaxProxy
6
- def initialize hash, syntax
7
- @syntax = syntax
8
- @proxy = hash["include"]
15
+ def method_missing method, *args, &block
16
+ if @proxy ||= proxy
17
+ @proxy.send(method, *args, &block)
18
+ else
19
+ STDERR.puts "Failed proxying #{@proxy_name}.#{method}(#{args.join(', ')})" if $DEBUG
9
20
  end
10
-
11
- def method_missing method, *args, &block
12
- if @proxy
13
- @proxy_value = proxy unless @proxy_value
14
- if @proxy_value
15
- @proxy_value.send(method, *args, &block)
16
- else
17
- STDERR.puts "Failed proxying #{@proxy}.#{method}(#{args.join(', ')})"
18
- end
19
- end
21
+ end
22
+
23
+ private
24
+
25
+ def proxy
26
+ case @included_name
27
+ when /^#/
28
+ @syntax.repository and @syntax.repository[@included_name[1..-1]]
29
+ when "$self", "$base"
30
+ @syntax
31
+ else
32
+ @syntax.syntaxes[@included_name] || Textpow.syntax(@included_name)
20
33
  end
21
-
22
- def proxy
23
- case @proxy
24
- when /^#/
25
- if @syntax.repository && @syntax.repository[@proxy[1..-1]]
26
- #puts "Repository"
27
- #@table["syntax"].repository.each_key{|k| puts k}
28
- return @syntax.repository[@proxy[1..-1]]
29
- end
30
- when "$self"
31
- return @syntax
32
- when "$base"
33
- return @syntax
34
- else
35
- return @syntax.syntaxes[@proxy]
36
- end
34
+ end
35
+ end
36
+
37
+ class SyntaxNode
38
+ @@syntaxes = {}
39
+
40
+ attr_accessor :syntax
41
+ attr_accessor :firstLineMatch
42
+ attr_accessor :foldingStartMarker
43
+ attr_accessor :foldingStopMarker
44
+ attr_accessor :match
45
+ attr_accessor :begin
46
+ attr_accessor :content
47
+ attr_accessor :fileTypes
48
+ attr_accessor :name
49
+ attr_accessor :contentName
50
+ attr_accessor :end
51
+ attr_accessor :scopeName
52
+ attr_accessor :keyEquivalent
53
+ attr_accessor :captures
54
+ attr_accessor :beginCaptures
55
+ attr_accessor :endCaptures
56
+ attr_accessor :repository
57
+ attr_accessor :patterns
58
+
59
+ def self.load(file, options={})
60
+ table = convert_file_to_table(file)
61
+ SyntaxNode.new(table, options)
62
+ end
63
+
64
+ def initialize(table, options={})
65
+ @syntax = options[:syntax] || self
66
+ @name_space = options[:name_space]
67
+
68
+ register_in_syntaxes(table["scopeName"])
69
+ parse_and_store_syntax_info(table)
70
+ end
71
+
72
+ def syntaxes
73
+ @@syntaxes[@name_space]
74
+ end
75
+
76
+ def parse(string, processor = RecordingProcessor.new)
77
+ processor.start_parsing scopeName
78
+ stack = [[self, nil]]
79
+ string.each_line do |line|
80
+ parse_line stack, line, processor
37
81
  end
38
- end
39
-
40
- class SyntaxNode
41
- OPTIONS = {:options => Oniguruma::OPTION_CAPTURE_GROUP}
42
-
43
- @@syntaxes = {}
44
-
45
- attr_accessor :syntax
46
- attr_accessor :firstLineMatch
47
- attr_accessor :foldingStartMarker
48
- attr_accessor :foldingStopMarker
49
- attr_accessor :match
50
- attr_accessor :begin
51
- attr_accessor :content
52
- attr_accessor :fileTypes
53
- attr_accessor :name
54
- attr_accessor :contentName
55
- attr_accessor :end
56
- attr_accessor :scopeName
57
- attr_accessor :keyEquivalent
58
- attr_accessor :captures
59
- attr_accessor :beginCaptures
60
- attr_accessor :endCaptures
61
- attr_accessor :repository
62
- attr_accessor :patterns
63
-
64
- def self.load filename, name_space = :default
65
- table = nil
66
- case filename
67
- when /(\.tmSyntax|\.plist)$/
68
- table = Plist::parse_xml( filename )
69
- else
70
- File.open( filename ) do |f|
71
- table = YAML.load( f )
72
- end
73
- end
74
- if table
75
- SyntaxNode.new( table, nil, name_space )
76
- else
77
- nil
78
- end
82
+ processor.end_parsing scopeName
83
+
84
+ processor
85
+ end
86
+
87
+ protected
88
+
89
+ def parse_and_store_syntax_info(table)
90
+ table.each do |key, value|
91
+ case key
92
+ when "firstLineMatch", "foldingStartMarker", "foldingStopMarker", "match", "begin"
93
+ instance_variable_set("@#{key}", parse_regex(value))
94
+ when "content", "fileTypes", "name", "contentName", "end", "scopeName", "keyEquivalent"
95
+ instance_variable_set("@#{key}", value)
96
+ when "captures", "beginCaptures", "endCaptures"
97
+ instance_variable_set("@#{key}", value.sort)
98
+ when "repository"
99
+ parse_repository value
100
+ when "patterns"
101
+ create_children value
102
+ when "comment"
103
+ else
104
+ STDERR.puts "Ignoring: #{key} => #{value.gsub("\n", "\n>>")}" if $DEBUG
105
+ end
79
106
  end
80
-
81
- def initialize hash, syntax = nil, name_space = :default
82
- @name_space = name_space
83
- @@syntaxes[@name_space] ||= {}
84
- @@syntaxes[@name_space][hash["scopeName"]] = self if hash["scopeName"]
85
- @syntax = syntax || self
86
- hash.each do |key, value|
87
- case key
88
- when "firstLineMatch", "foldingStartMarker", "foldingStopMarker", "match", "begin"
89
- begin
90
- instance_variable_set( "@#{key}", Oniguruma::ORegexp.new( value, OPTIONS ) )
91
- rescue ArgumentError => e
92
- raise ParsingError, "Parsing error in #{value}: #{e.to_s}"
93
- end
94
- when "content", "fileTypes", "name", "contentName", "end", "scopeName", "keyEquivalent"
95
- instance_variable_set( "@#{key}", value )
96
- when "captures", "beginCaptures", "endCaptures"
97
- instance_variable_set( "@#{key}", value.sort )
98
- when "repository"
99
- parse_repository value
100
- when "patterns"
101
- create_children value
102
- else
103
- STDERR.puts "Ignoring: #{key} => #{value.gsub("\n", "\n>>")}" if $DEBUG
104
- end
105
- end
107
+ end
108
+
109
+ def parse_regex(value)
110
+ if Textpow::RUBY_19
111
+ parse_regex_with_invalid_chars(value)
112
+ else
113
+ Oniguruma::ORegexp.new(value, :options => Oniguruma::OPTION_CAPTURE_GROUP)
106
114
  end
107
-
108
-
109
- def syntaxes
110
- @@syntaxes[@name_space]
115
+ rescue RegexpError, ArgumentError => e
116
+ raise ParsingError, "Parsing error in #{value}: #{e.to_s}"
117
+ end
118
+
119
+ def parse_regex_with_invalid_chars(value)
120
+ Regexp.new(value.force_encoding('UTF-8'))
121
+ rescue RegexpError => e
122
+ if e.message =~ /UTF-8/ or e.message =~ /invalid multibyte escape/
123
+ puts "Ignored utf8 regex error #{$!}"
124
+ /INVALID_UTF8/
125
+ else
126
+ raise e
111
127
  end
112
-
113
- def parse( string, processor = nil )
114
- processor.start_parsing self.scopeName if processor
115
- stack = [[self, nil]]
116
- string.each_line do |line|
117
- parse_line stack, line, processor
118
- end
119
- processor.end_parsing self.scopeName if processor
120
- processor
128
+ end
129
+
130
+ # register in global syntax list -> can be found by include
131
+ def register_in_syntaxes(scope)
132
+ @@syntaxes[@name_space] ||= {}
133
+ @@syntaxes[@name_space][scope] = self if scope
134
+ end
135
+
136
+ def self.convert_file_to_table(file)
137
+ raise "File not found: #{file}" unless File.exist?(file)
138
+ case file
139
+ when /(\.tmSyntax|\.plist)$/
140
+ require 'plist'
141
+ Plist::parse_xml(file)
142
+ else
143
+ YAML.load_file(file)
121
144
  end
122
-
123
- protected
124
-
125
- def parse_repository repository
126
- @repository = {}
127
- repository.each do |key, value|
128
- if value["include"]
129
- @repository[key] = SyntaxProxy.new( value, self.syntax )
130
- else
131
- @repository[key] = SyntaxNode.new( value, self.syntax, @name_space )
132
- end
133
- end
145
+ end
146
+
147
+ def parse_repository(repository)
148
+ @repository = {}
149
+ repository.each do |key, value|
150
+ if value["include"]
151
+ @repository[key] = SyntaxProxy.new(value["include"], syntax)
152
+ else
153
+ @repository[key] = SyntaxNode.new(value, :syntax => syntax, :name_space => @name_space)
154
+ end
134
155
  end
135
-
136
- def create_children patterns
137
- @patterns = []
138
- patterns.each do |p|
139
- if p["include"]
140
- @patterns << SyntaxProxy.new( p, self.syntax )
141
- else
142
- @patterns << SyntaxNode.new( p, self.syntax, @name_space )
143
- end
144
- end
156
+ end
157
+
158
+ def create_children(patterns)
159
+ @patterns = patterns.map do |pattern|
160
+ if pattern["include"]
161
+ SyntaxProxy.new(pattern["include"], syntax)
162
+ else
163
+ SyntaxNode.new(pattern, :syntax => syntax, :name_space => @name_space)
164
+ end
165
+ end
166
+ end
167
+
168
+ def parse_captures name, pattern, match, processor
169
+ captures = pattern.match_captures( name, match )
170
+ captures.reject! { |group, range, name| ! range.first || range.first == range.last }
171
+ starts = []
172
+ ends = []
173
+ captures.each do |group, range, name|
174
+ starts << [range.first, group, name]
175
+ ends << [range.last, -group, name]
145
176
  end
146
177
 
147
- def parse_captures name, pattern, match, processor
148
- captures = pattern.match_captures( name, match )
149
- captures.reject! { |group, range, name| ! range.first || range.first == range.last }
150
- starts = []
151
- ends = []
152
- captures.each do |group, range, name|
153
- starts << [range.first, group, name]
154
- ends << [range.last, -group, name]
155
- end
156
-
157
178
  # STDERR.puts '-' * 100
158
179
  # starts.sort!.reverse!.each{|c| STDERR.puts c.join(', ')}
159
180
  # STDERR.puts
160
181
  # ends.sort!.reverse!.each{|c| STDERR.puts c.join(', ')}
161
- starts.sort!.reverse!
162
- ends.sort!.reverse!
163
-
164
- while ! starts.empty? || ! ends.empty?
165
- if starts.empty?
166
- pos, key, name = ends.pop
167
- processor.close_tag name, pos
168
- elsif ends.empty?
169
- pos, key, name = starts.pop
170
- processor.open_tag name, pos
171
- elsif ends.last[1].abs < starts.last[1]
172
- pos, key, name = ends.pop
173
- processor.close_tag name, pos
174
- else
175
- pos, key, name = starts.pop
176
- processor.open_tag name, pos
177
- end
178
- end
182
+ starts.sort!.reverse!
183
+ ends.sort!.reverse!
184
+
185
+ while ! starts.empty? || ! ends.empty?
186
+ if starts.empty?
187
+ pos, key, name = ends.pop
188
+ processor.close_tag name, pos
189
+ elsif ends.empty?
190
+ pos, key, name = starts.pop
191
+ processor.open_tag name, pos
192
+ elsif ends.last[1].abs < starts.last[1]
193
+ pos, key, name = ends.pop
194
+ processor.close_tag name, pos
195
+ else
196
+ pos, key, name = starts.pop
197
+ processor.open_tag name, pos
198
+ end
179
199
  end
180
-
181
- def match_captures name, match
182
- matches = []
183
- captures = instance_variable_get "@#{name}"
184
- if captures
185
- captures.each do |key, value|
186
- if key =~ /^\d*$/
187
- matches << [key.to_i, match.offset( key.to_i ), value["name"]] if key.to_i < match.size
188
- else
189
- matches << [match.to_index( key.to_sym ), match.offset( key.to_sym), value["name"]] if match.to_index( key.to_sym )
190
- end
191
- end
192
- end
193
- matches
200
+ end
201
+
202
+ def match_captures name, match
203
+ matches = []
204
+ captures = instance_variable_get "@#{name}"
205
+ if captures
206
+ captures.each do |key, value|
207
+ if key =~ /^\d*$/
208
+ matches << [key.to_i, match.offset( key.to_i ), value["name"]] if key.to_i < match.size
209
+ else
210
+ matches << [match.to_index( key.to_sym ), match.offset( key.to_sym), value["name"]] if match.to_index( key.to_sym )
211
+ end
212
+ end
213
+ end
214
+ matches
215
+ end
216
+
217
+ def match_first string, position
218
+ if self.match
219
+ if match = self.match.match( string, position )
220
+ return [self, match]
221
+ end
222
+ elsif self.begin
223
+ if match = self.begin.match( string, position )
224
+ return [self, match]
225
+ end
226
+ elsif self.end
227
+ else
228
+ return match_first_son( string, position )
194
229
  end
195
-
196
- def match_first string, position
197
- if self.match
198
- if match = self.match.match( string, position )
199
- return [self, match]
200
- end
201
- elsif self.begin
202
- if match = self.begin.match( string, position )
203
- return [self, match]
204
- end
205
- elsif self.end
206
- else
207
- return match_first_son( string, position )
208
- end
209
- nil
230
+ nil
231
+ end
232
+
233
+ def match_end string, match, position
234
+ regstring = self.end.clone
235
+ regstring.gsub!( /\\([1-9])/ ) { |s| match[$1.to_i] }
236
+
237
+ # in spox-textpow this is \\g in 1.9 !?
238
+ regstring.gsub!( /\\k<(.*?)>/ ) { |s| match[$1.to_sym] }
239
+ if Textpow::RUBY_19
240
+ Regexp.new( regstring ).match( string, position )
241
+ else
242
+ Oniguruma::ORegexp.new( regstring ).match( string, position )
210
243
  end
211
-
212
- def match_end string, match, position
213
- regstring = self.end.clone
214
- regstring.gsub!( /\\([1-9])/ ) { |s| match[$1.to_i] }
215
- regstring.gsub!( /\\k<(.*?)>/ ) { |s| match[$1.to_sym] }
216
- Oniguruma::ORegexp.new( regstring ).match( string, position )
244
+ end
245
+
246
+ # find earliest matching pattern
247
+ def match_first_son(string, position)
248
+ return if not patterns
249
+
250
+ earliest_match = nil
251
+ earliest_match_offset = nil
252
+ patterns.each do |pattern|
253
+ next unless match = pattern.match_first(string, position)
254
+
255
+ match_offset = match_offset(match[1]).first
256
+ return match if match_offset == 0 # no need to look any further
257
+
258
+ if not earliest_match or earliest_match_offset > match_offset
259
+ earliest_match = match
260
+ earliest_match_offset = match_offset
261
+ end
217
262
  end
218
-
219
- def match_first_son string, position
220
- match = nil
221
- if self.patterns
222
- self.patterns.each do |p|
223
- tmatch = p.match_first string, position
224
- if tmatch
225
- if ! match || match[1].offset.first > tmatch[1].offset.first
226
- match = tmatch
227
- end
228
- #break if tmatch[1].offset.first == position
229
- end
230
- end
231
- end
232
- match
263
+
264
+ earliest_match
265
+ end
266
+
267
+ def parse_line(stack, line, processor)
268
+ processor.new_line line
269
+ top, match = stack.last
270
+ position = 0
271
+ #@ln ||= 0
272
+ #@ln += 1
273
+ #STDERR.puts @ln
274
+ loop do
275
+ if top.patterns
276
+ pattern, pattern_match = top.match_first_son(line, position)
277
+ end
278
+
279
+ if top.end
280
+ end_match = top.match_end( line, match, position )
281
+ end
282
+
283
+ if end_match and (not pattern_match or match_offset(pattern_match).first >= match_offset(end_match).first)
284
+ pattern_match = end_match
285
+ start_pos = match_offset(pattern_match).first
286
+ end_pos = match_offset(pattern_match).last
287
+
288
+ processor.close_tag top.contentName, start_pos if top.contentName
289
+ parse_captures "captures", top, pattern_match, processor
290
+ parse_captures "endCaptures", top, pattern_match, processor
291
+ processor.close_tag top.name, end_pos if top.name
292
+ stack.pop
293
+ top, match = stack.last
294
+ else
295
+ break unless pattern
296
+
297
+ start_pos = match_offset(pattern_match).first
298
+ end_pos = match_offset(pattern_match).last
299
+
300
+ if pattern.begin
301
+ processor.open_tag pattern.name, start_pos if pattern.name
302
+ parse_captures "captures", pattern, pattern_match, processor
303
+ parse_captures "beginCaptures", pattern, pattern_match, processor
304
+ processor.open_tag pattern.contentName, end_pos if pattern.contentName
305
+ top = pattern
306
+ match = pattern_match
307
+ stack << [top, match]
308
+ elsif pattern.match
309
+ processor.open_tag pattern.name, start_pos if pattern.name
310
+ parse_captures "captures", pattern, pattern_match, processor
311
+ processor.close_tag pattern.name, end_pos if pattern.name
312
+ end
313
+ end
314
+
315
+ position = end_pos
233
316
  end
234
-
235
- def parse_line stack, line, processor
236
- processor.new_line line if processor
237
- top, match = stack.last
238
- position = 0
239
- #@ln ||= 0
240
- #@ln += 1
241
- #STDERR.puts @ln
242
- while true
243
- if top.patterns
244
- pattern, pattern_match = top.match_first_son line, position
245
- else
246
- pattern, pattern_match = nil
247
- end
248
-
249
- end_match = nil
250
- if top.end
251
- end_match = top.match_end( line, match, position )
252
- end
253
-
254
- if end_match && ( ! pattern_match || pattern_match.offset.first >= end_match.offset.first )
255
- pattern_match = end_match
256
- start_pos = pattern_match.offset.first
257
- end_pos = pattern_match.offset.last
258
- processor.close_tag top.contentName, start_pos if top.contentName && processor
259
- parse_captures "captures", top, pattern_match, processor if processor
260
- parse_captures "endCaptures", top, pattern_match, processor if processor
261
- processor.close_tag top.name, end_pos if top.name && processor
262
- stack.pop
263
- top, match = stack.last
264
- else
265
- break unless pattern
266
- start_pos = pattern_match.offset.first
267
- end_pos = pattern_match.offset.last
268
- if pattern.begin
269
- processor.open_tag pattern.name, start_pos if pattern.name && processor
270
- parse_captures "captures", pattern, pattern_match, processor if processor
271
- parse_captures "beginCaptures", pattern, pattern_match, processor if processor
272
- processor.open_tag pattern.contentName, end_pos if pattern.contentName && processor
273
- top = pattern
274
- match = pattern_match
275
- stack << [top, match]
276
- elsif pattern.match
277
- processor.open_tag pattern.name, start_pos if pattern.name && processor
278
- parse_captures "captures", pattern, pattern_match, processor if processor
279
- processor.close_tag pattern.name, end_pos if pattern.name && processor
280
- end
281
- end
282
- position = end_pos
283
- end
317
+ end
318
+
319
+ def match_offset(match)
320
+ if Textpow::RUBY_19
321
+ match.offset(0)
322
+ else
323
+ match.offset
284
324
  end
285
- end
325
+ end
326
+ end
286
327
  end