textpow 0.10.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +7 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +30 -0
- data/{History.txt → History.rdoc} +10 -0
- data/{README.txt → MIT-LICENSE.txt} +0 -30
- data/README.rdoc +82 -0
- data/Rakefile +42 -49
- data/bin/plist2syntax +0 -0
- data/bin/plist2yaml +0 -0
- data/examples/benchmark_js.rb +13 -0
- data/examples/jquery.js +9046 -0
- data/lib/textpow.rb +43 -3
- data/lib/textpow/debug_processor.rb +35 -35
- data/lib/textpow/recording_processor.rb +13 -0
- data/lib/textpow/score_manager.rb +60 -60
- data/lib/textpow/syntax.rb +303 -262
- data/lib/textpow/syntax/broken/markdown.syntax +519 -0
- data/lib/textpow/syntax/broken/php.syntax +1253 -0
- data/lib/textpow/syntax/buggy/nemerle.syntax +74 -0
- data/lib/textpow/syntax/old/YAML.yaml +160 -0
- data/lib/textpow/syntax/old/txt2tags.yaml +139 -0
- data/lib/textpow/syntax/source.actionscript.syntax +97 -0
- data/lib/textpow/syntax/source.active4d.library.syntax +21 -0
- data/lib/textpow/syntax/source.active4d.syntax +276 -0
- data/lib/textpow/syntax/source.ada.syntax +33 -0
- data/lib/textpow/syntax/source.antlr.syntax +151 -0
- data/lib/textpow/syntax/source.apache-config.mod_perl.syntax +50 -0
- data/lib/textpow/syntax/source.apache-config.syntax +191 -0
- data/lib/textpow/syntax/source.applescript.syntax +384 -0
- data/lib/textpow/syntax/source.asp.syntax +70 -0
- data/lib/textpow/syntax/source.asp.vb.net.syntax +129 -0
- data/lib/textpow/syntax/source.c++.qt.syntax +26 -0
- data/lib/textpow/syntax/source.c++.syntax +186 -0
- data/lib/textpow/syntax/source.c-sharp.syntax +59 -0
- data/lib/textpow/syntax/source.c.ragel.syntax +201 -0
- data/lib/textpow/syntax/source.c.syntax +414 -0
- data/lib/textpow/syntax/source.camlp4.ocaml.syntax +36 -0
- data/lib/textpow/syntax/source.cm.syntax +32 -0
- data/lib/textpow/syntax/source.coffee.syntax +216 -0
- data/lib/textpow/syntax/source.context-free.syntax +176 -0
- data/lib/textpow/syntax/source.css.beta.syntax +1925 -0
- data/lib/textpow/syntax/source.css.syntax +195 -0
- data/lib/textpow/syntax/source.d.syntax +142 -0
- data/lib/textpow/syntax/source.diff.syntax +81 -0
- data/lib/textpow/syntax/source.dot.syntax +47 -0
- data/lib/textpow/syntax/source.dylan.syntax +62 -0
- data/lib/textpow/syntax/source.eiffel.syntax +78 -0
- data/lib/textpow/syntax/source.erlang.syntax +922 -0
- data/lib/textpow/syntax/source.fortran.syntax +141 -0
- data/lib/textpow/syntax/source.fscript.syntax +80 -0
- data/lib/textpow/syntax/source.fxscript.syntax +142 -0
- data/lib/textpow/syntax/source.gri.syntax +83 -0
- data/lib/textpow/syntax/source.groovy.groovy.syntax +191 -0
- data/lib/textpow/syntax/source.haskell.syntax +88 -0
- data/lib/textpow/syntax/source.icalendar.syntax +32 -0
- data/lib/textpow/syntax/source.inform.syntax +48 -0
- data/lib/textpow/syntax/source.ini.syntax +55 -0
- data/lib/textpow/syntax/source.io.syntax +81 -0
- data/lib/textpow/syntax/source.java-props.syntax +20 -0
- data/lib/textpow/syntax/source.java.syntax +211 -0
- data/lib/textpow/syntax/source.js.greasemonkey.syntax +34 -0
- data/lib/textpow/syntax/source.js.jquery.syntax +114 -0
- data/lib/textpow/syntax/source.js.mootools.syntax +572 -0
- data/lib/textpow/syntax/source.js.prototype.bracketed.syntax +140 -0
- data/lib/textpow/syntax/source.js.prototype.syntax +72 -0
- data/lib/textpow/syntax/source.js.syntax +256 -0
- data/lib/textpow/syntax/source.js.yui.syntax +176 -0
- data/lib/textpow/syntax/source.json.syntax +136 -0
- data/lib/textpow/syntax/source.lex.syntax +219 -0
- data/lib/textpow/syntax/source.lighttpd-config.syntax +54 -0
- data/lib/textpow/syntax/source.lilypond.syntax +492 -0
- data/lib/textpow/syntax/source.lisp.syntax +61 -0
- data/lib/textpow/syntax/source.logo.syntax +29 -0
- data/lib/textpow/syntax/source.logtalk.syntax +152 -0
- data/lib/textpow/syntax/source.lua.syntax +86 -0
- data/lib/textpow/syntax/source.makefile.syntax +36 -0
- data/lib/textpow/syntax/source.matlab.syntax +142 -0
- data/lib/textpow/syntax/source.mel.syntax +92 -0
- data/lib/textpow/syntax/source.mips.syntax +66 -0
- data/lib/textpow/syntax/source.ml.syntax +121 -0
- data/lib/textpow/syntax/source.modula-3.syntax +47 -0
- data/lib/textpow/syntax/source.nant-build.syntax +53 -0
- data/lib/textpow/syntax/source.objc++.syntax +18 -0
- data/lib/textpow/syntax/source.objc.syntax +233 -0
- data/lib/textpow/syntax/source.ocaml.syntax +764 -0
- data/lib/textpow/syntax/source.ocamllex.syntax +167 -0
- data/lib/textpow/syntax/source.ocamlyacc.syntax +184 -0
- data/lib/textpow/syntax/source.open-gl.syntax +14 -0
- data/lib/textpow/syntax/source.pascal.syntax +77 -0
- data/lib/textpow/syntax/source.pascal.vectorscript.syntax +57 -0
- data/lib/textpow/syntax/source.perl.syntax +1113 -0
- data/lib/textpow/syntax/source.php.cake.syntax +55 -0
- data/lib/textpow/syntax/source.plist.tm-grammar.syntax +708 -0
- data/lib/textpow/syntax/source.postscript.syntax +114 -0
- data/lib/textpow/syntax/source.processing.syntax +106 -0
- data/lib/textpow/syntax/source.prolog.syntax +40 -0
- data/lib/textpow/syntax/source.python.django.syntax +21 -0
- data/lib/textpow/syntax/source.python.syntax +868 -0
- data/lib/textpow/syntax/source.qmake.syntax +114 -0
- data/lib/textpow/syntax/source.quake-config.syntax +32 -0
- data/lib/textpow/syntax/source.r-console.syntax +16 -0
- data/lib/textpow/syntax/source.r.syntax +81 -0
- data/lib/textpow/syntax/source.regexp.oniguruma.syntax +107 -0
- data/lib/textpow/syntax/source.regexp.python.syntax +109 -0
- data/lib/textpow/syntax/source.regexp.syntax +50 -0
- data/lib/textpow/syntax/source.remind.syntax +253 -0
- data/lib/textpow/syntax/source.rez.syntax +80 -0
- data/lib/textpow/syntax/source.ruby.experimental.syntax +145 -0
- data/lib/textpow/syntax/source.ruby.rails.syntax +88 -0
- data/lib/textpow/syntax/source.ruby.syntax +1035 -0
- data/lib/textpow/syntax/source.s5.syntax +69 -0
- data/lib/textpow/syntax/source.sass.syntax +45 -0
- data/lib/textpow/syntax/source.scheme.syntax +347 -0
- data/lib/textpow/syntax/source.scilab.syntax +41 -0
- data/lib/textpow/syntax/source.scss.syntax +527 -0
- data/lib/textpow/syntax/source.shell.syntax +384 -0
- data/lib/textpow/syntax/source.slate.syntax +149 -0
- data/lib/textpow/syntax/source.smarty.syntax +63 -0
- data/lib/textpow/syntax/source.sql.ruby.syntax +18 -0
- data/lib/textpow/syntax/source.sql.syntax +237 -0
- data/lib/textpow/syntax/source.ssh-config.syntax +33 -0
- data/lib/textpow/syntax/source.strings.syntax +39 -0
- data/lib/textpow/syntax/source.swig.syntax +57 -0
- data/lib/textpow/syntax/source.tcl.macports.syntax +163 -0
- data/lib/textpow/syntax/source.tcl.syntax +152 -0
- data/lib/textpow/syntax/source.yaml.syntax +160 -0
- data/lib/textpow/syntax/text.active4d-ini.syntax +50 -0
- data/lib/textpow/syntax/text.bbcode.syntax +287 -0
- data/lib/textpow/syntax/text.bibtex.syntax +151 -0
- data/lib/textpow/syntax/text.blog.html.syntax +41 -0
- data/lib/textpow/syntax/text.blog.markdown.syntax +42 -0
- data/lib/textpow/syntax/text.blog.syntax +27 -0
- data/lib/textpow/syntax/text.blog.textile.syntax +27 -0
- data/lib/textpow/syntax/text.gtdalt.syntax +143 -0
- data/lib/textpow/syntax/text.haml.syntax +88 -0
- data/lib/textpow/syntax/text.html.asp.net.syntax +424 -0
- data/lib/textpow/syntax/text.html.asp.syntax +27 -0
- data/lib/textpow/syntax/text.html.basic.syntax +362 -0
- data/lib/textpow/syntax/text.html.cfm.syntax +119 -0
- data/lib/textpow/syntax/text.html.django.syntax +36 -0
- data/lib/textpow/syntax/text.html.dokuwiki.syntax +204 -0
- data/lib/textpow/syntax/text.html.doxygen.syntax +43 -0
- data/lib/textpow/syntax/text.html.markdown.multimarkdown.syntax +39 -0
- data/lib/textpow/syntax/text.html.mason.syntax +119 -0
- data/lib/textpow/syntax/text.html.mediawiki.syntax +567 -0
- data/lib/textpow/syntax/text.html.mt.syntax +162 -0
- data/lib/textpow/syntax/text.html.ruby.syntax +40 -0
- data/lib/textpow/syntax/text.html.strict.active4d.syntax +311 -0
- data/lib/textpow/syntax/text.html.tcl.syntax +26 -0
- data/lib/textpow/syntax/text.html.textile.syntax +215 -0
- data/lib/textpow/syntax/text.html.tt.syntax +121 -0
- data/lib/textpow/syntax/text.html.twiki.syntax +241 -0
- data/lib/textpow/syntax/text.html.xhtml.1-strict.syntax +4027 -0
- data/lib/textpow/syntax/text.log.latex.syntax +50 -0
- data/lib/textpow/syntax/text.mail.markdown.syntax +118 -0
- data/lib/textpow/syntax/text.man.syntax +17 -0
- data/lib/textpow/syntax/text.moinmoin.syntax +189 -0
- data/lib/textpow/syntax/text.plain.gtd.syntax +22 -0
- data/lib/textpow/syntax/text.plain.release-notes.syntax +46 -0
- data/lib/textpow/syntax/text.plain.syntax +32 -0
- data/lib/textpow/syntax/text.plist.syntax +635 -0
- data/lib/textpow/syntax/text.pmwiki.syntax +113 -0
- data/lib/textpow/syntax/text.restructuredtext.syntax +250 -0
- data/lib/textpow/syntax/text.setext.syntax +147 -0
- data/lib/textpow/syntax/text.subversion-commit.syntax +36 -0
- data/lib/textpow/syntax/text.tabular.csv.syntax +68 -0
- data/lib/textpow/syntax/text.tabular.tsv.syntax +50 -0
- data/lib/textpow/syntax/text.tex.latex.beamer.syntax +41 -0
- data/lib/textpow/syntax/text.tex.latex.haskell.syntax +24 -0
- data/lib/textpow/syntax/text.tex.latex.memoir.syntax +64 -0
- data/lib/textpow/syntax/text.tex.latex.rd.syntax +91 -0
- data/lib/textpow/syntax/text.tex.latex.sweave.syntax +84 -0
- data/lib/textpow/syntax/text.tex.latex.syntax +566 -0
- data/lib/textpow/syntax/text.tex.math.syntax +49 -0
- data/lib/textpow/syntax/text.tex.syntax +86 -0
- data/lib/textpow/syntax/text.txt2tags.syntax +79 -0
- data/lib/textpow/syntax/text.xml.apple-dist.syntax +77 -0
- data/lib/textpow/syntax/text.xml.strict.syntax +92 -0
- data/lib/textpow/syntax/text.xml.syntax +180 -0
- data/lib/textpow/syntax/text.xml.xsl.syntax +60 -0
- data/lib/textpow/version.rb +3 -0
- data/spec/fixtures/objeck.plist +107 -0
- data/spec/fixtures/utf8.txt +1 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/textpow/score_manager_spec.rb +20 -0
- data/spec/textpow/syntax_files_spec.rb +26 -0
- data/spec/textpow/syntax_spec.rb +225 -0
- data/spec/textpow_spec.rb +57 -0
- data/textpow.gemspec +19 -0
- metadata +246 -68
- data/Manifest.txt +0 -13
- data/mm/manual.mm +0 -266
- data/test/test_textpow.rb +0 -25
data/lib/textpow.rb
CHANGED
@@ -1,10 +1,50 @@
|
|
1
1
|
require 'yaml'
|
2
|
-
require 'oniguruma'
|
3
2
|
require 'textpow/syntax'
|
4
3
|
require 'textpow/debug_processor'
|
4
|
+
require 'textpow/recording_processor'
|
5
5
|
require 'textpow/score_manager'
|
6
|
-
|
6
|
+
require 'textpow/version'
|
7
7
|
|
8
8
|
module Textpow
|
9
|
-
|
9
|
+
class ParsingError < Exception; end
|
10
|
+
|
11
|
+
def self.syntax_path
|
12
|
+
File.join(File.dirname(__FILE__), 'textpow', 'syntax')
|
13
|
+
end
|
14
|
+
|
15
|
+
@@syntax = {}
|
16
|
+
def self.syntax(syntax_name)
|
17
|
+
syntax_name = syntax_name.downcase
|
18
|
+
if @@syntax.has_key?(syntax_name)
|
19
|
+
@@syntax[syntax_name]
|
20
|
+
else
|
21
|
+
@@syntax[syntax_name] = uncached_syntax(syntax_name)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def self.uncached_syntax(name)
|
28
|
+
path = (
|
29
|
+
find_syntax_by_path(name) ||
|
30
|
+
find_syntax_by_scope_name(name) ||
|
31
|
+
find_syntax_by_fuzzy_name(name)
|
32
|
+
)
|
33
|
+
SyntaxNode.load(path) if path
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.find_syntax_by_scope_name(name)
|
37
|
+
path = File.join(syntax_path, "#{name}.syntax")
|
38
|
+
path if File.exist?(path)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.find_syntax_by_fuzzy_name(name)
|
42
|
+
path = Dir.glob(File.join(syntax_path, "*.#{name}.*")).sort_by(&:size).first
|
43
|
+
path if path and File.exist?(path)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.find_syntax_by_path(path)
|
47
|
+
path if File.file?(path)
|
48
|
+
end
|
10
49
|
end
|
50
|
+
|
@@ -1,36 +1,36 @@
|
|
1
|
-
module Textpow
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
1
|
+
module Textpow
|
2
|
+
class DebugProcessor
|
3
|
+
def initialize
|
4
|
+
@line_number = 0
|
5
|
+
@printable_line = ""
|
6
|
+
end
|
7
|
+
|
8
|
+
def pprint line, string, position = 0
|
9
|
+
line.replace line.ljust( position + string.size, " ")
|
10
|
+
line[position,string.size] = string
|
11
|
+
line
|
12
|
+
end
|
13
|
+
|
14
|
+
def open_tag name, position
|
15
|
+
STDERR.puts pprint( "", "{#{name}", position + @line_marks.size)
|
16
|
+
end
|
17
|
+
|
18
|
+
def close_tag name, position
|
19
|
+
STDERR.puts pprint( "", "}#{name}", position + @line_marks.size)
|
20
|
+
end
|
21
|
+
|
22
|
+
def new_line line
|
23
|
+
@line_number += 1
|
24
|
+
@line_marks = "[#{@line_number.to_s.rjust( 4, '0' )}] "
|
25
|
+
STDERR.puts "#{@line_marks}#{line}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def start_parsing name
|
29
|
+
STDERR.puts "{#{name}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def end_parsing name
|
33
|
+
STDERR.puts "}#{name}"
|
34
|
+
end
|
35
|
+
end
|
36
36
|
end
|
@@ -1,65 +1,65 @@
|
|
1
1
|
module Textpow
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
end
|
25
|
-
end
|
26
|
-
max = [max, score_term( arrays[0], reference_scope )].max unless excluded
|
27
|
-
else
|
28
|
-
raise ParsingError, "Error in scope string: '#{search_scope}' #{arrays.size} is not a valid number of operands" if arrays.size < 1
|
2
|
+
class ScoreManager
|
3
|
+
POINT_DEPTH = 4
|
4
|
+
NESTING_DEPTH = 40
|
5
|
+
START_VALUE = 2 ** ( POINT_DEPTH * NESTING_DEPTH )
|
6
|
+
BASE = 2 ** POINT_DEPTH
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@scores = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def score search_scope, reference_scope
|
13
|
+
max = 0
|
14
|
+
search_scope.split( ',' ).each do |scope|
|
15
|
+
arrays = scope.split(/\B-/)
|
16
|
+
if arrays.size == 1
|
17
|
+
max = [max, score_term( arrays[0], reference_scope )].max
|
18
|
+
elsif arrays.size > 1
|
19
|
+
excluded = false
|
20
|
+
arrays[1..-1].each do |a|
|
21
|
+
if score_term( arrays[1], reference_scope ) > 0
|
22
|
+
excluded = true
|
23
|
+
break
|
29
24
|
end
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
def score_term search_scope, reference_scope
|
37
|
-
unless @scores[reference_scope] && @scores[reference_scope][search_scope]
|
38
|
-
@scores[reference_scope] ||= {}
|
39
|
-
@scores[reference_scope][search_scope] = score_array( search_scope.split(' '), reference_scope.split( ' ' ) )
|
40
|
-
end
|
41
|
-
@scores[reference_scope][search_scope]
|
25
|
+
end
|
26
|
+
max = [max, score_term( arrays[0], reference_scope )].max unless excluded
|
27
|
+
else
|
28
|
+
raise ParsingError, "Error in scope string: '#{search_scope}' #{arrays.size} is not a valid number of operands" if arrays.size < 1
|
29
|
+
end
|
42
30
|
end
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
31
|
+
max
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def score_term search_scope, reference_scope
|
37
|
+
unless @scores[reference_scope] && @scores[reference_scope][search_scope]
|
38
|
+
@scores[reference_scope] ||= {}
|
39
|
+
@scores[reference_scope][search_scope] = score_array( search_scope.split(' '), reference_scope.split( ' ' ) )
|
40
|
+
end
|
41
|
+
@scores[reference_scope][search_scope]
|
42
|
+
end
|
43
|
+
|
44
|
+
def score_array search_array, reference_array
|
45
|
+
pending = search_array
|
46
|
+
current = reference_array.last
|
47
|
+
reg = Regexp.new( "^#{Regexp.escape( pending.last )}" )
|
48
|
+
multiplier = START_VALUE
|
49
|
+
result = 0
|
50
|
+
while pending.size > 0 && current
|
51
|
+
if reg =~ current
|
52
|
+
point_score = (2**POINT_DEPTH) - current.count( '.' ) + Regexp.last_match[0].count( '.' )
|
53
|
+
result += point_score * multiplier
|
54
|
+
pending.pop
|
55
|
+
reg = Regexp.new( "^#{Regexp.escape( pending.last )}" ) if pending.size > 0
|
56
|
+
end
|
57
|
+
multiplier = multiplier / BASE
|
58
|
+
reference_array.pop
|
59
|
+
current = reference_array.last
|
63
60
|
end
|
64
|
-
|
61
|
+
result = 0 if pending.size > 0
|
62
|
+
result
|
63
|
+
end
|
64
|
+
end
|
65
65
|
end
|
data/lib/textpow/syntax.rb
CHANGED
@@ -1,286 +1,327 @@
|
|
1
|
-
|
1
|
+
module Textpow
|
2
|
+
RUBY_19 = (RUBY_VERSION > "1.9.0")
|
3
|
+
end
|
4
|
+
require 'oniguruma' unless Textpow::RUBY_19
|
2
5
|
|
3
6
|
module Textpow
|
7
|
+
# at load time we do not know all patterns / all syntaxes
|
8
|
+
# so we store a proxy, that tries to find the correct syntax at runtime
|
9
|
+
class SyntaxProxy
|
10
|
+
def initialize(included_name, syntax)
|
11
|
+
@syntax = syntax
|
12
|
+
@included_name = included_name
|
13
|
+
end
|
4
14
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
15
|
+
def method_missing method, *args, &block
|
16
|
+
if @proxy ||= proxy
|
17
|
+
@proxy.send(method, *args, &block)
|
18
|
+
else
|
19
|
+
STDERR.puts "Failed proxying #{@proxy_name}.#{method}(#{args.join(', ')})" if $DEBUG
|
9
20
|
end
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def proxy
|
26
|
+
case @included_name
|
27
|
+
when /^#/
|
28
|
+
@syntax.repository and @syntax.repository[@included_name[1..-1]]
|
29
|
+
when "$self", "$base"
|
30
|
+
@syntax
|
31
|
+
else
|
32
|
+
@syntax.syntaxes[@included_name] || Textpow.syntax(@included_name)
|
20
33
|
end
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class SyntaxNode
|
38
|
+
@@syntaxes = {}
|
39
|
+
|
40
|
+
attr_accessor :syntax
|
41
|
+
attr_accessor :firstLineMatch
|
42
|
+
attr_accessor :foldingStartMarker
|
43
|
+
attr_accessor :foldingStopMarker
|
44
|
+
attr_accessor :match
|
45
|
+
attr_accessor :begin
|
46
|
+
attr_accessor :content
|
47
|
+
attr_accessor :fileTypes
|
48
|
+
attr_accessor :name
|
49
|
+
attr_accessor :contentName
|
50
|
+
attr_accessor :end
|
51
|
+
attr_accessor :scopeName
|
52
|
+
attr_accessor :keyEquivalent
|
53
|
+
attr_accessor :captures
|
54
|
+
attr_accessor :beginCaptures
|
55
|
+
attr_accessor :endCaptures
|
56
|
+
attr_accessor :repository
|
57
|
+
attr_accessor :patterns
|
58
|
+
|
59
|
+
def self.load(file, options={})
|
60
|
+
table = convert_file_to_table(file)
|
61
|
+
SyntaxNode.new(table, options)
|
62
|
+
end
|
63
|
+
|
64
|
+
def initialize(table, options={})
|
65
|
+
@syntax = options[:syntax] || self
|
66
|
+
@name_space = options[:name_space]
|
67
|
+
|
68
|
+
register_in_syntaxes(table["scopeName"])
|
69
|
+
parse_and_store_syntax_info(table)
|
70
|
+
end
|
71
|
+
|
72
|
+
def syntaxes
|
73
|
+
@@syntaxes[@name_space]
|
74
|
+
end
|
75
|
+
|
76
|
+
def parse(string, processor = RecordingProcessor.new)
|
77
|
+
processor.start_parsing scopeName
|
78
|
+
stack = [[self, nil]]
|
79
|
+
string.each_line do |line|
|
80
|
+
parse_line stack, line, processor
|
37
81
|
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
attr_accessor :patterns
|
63
|
-
|
64
|
-
def self.load filename, name_space = :default
|
65
|
-
table = nil
|
66
|
-
case filename
|
67
|
-
when /(\.tmSyntax|\.plist)$/
|
68
|
-
table = Plist::parse_xml( filename )
|
69
|
-
else
|
70
|
-
File.open( filename ) do |f|
|
71
|
-
table = YAML.load( f )
|
72
|
-
end
|
73
|
-
end
|
74
|
-
if table
|
75
|
-
SyntaxNode.new( table, nil, name_space )
|
76
|
-
else
|
77
|
-
nil
|
78
|
-
end
|
82
|
+
processor.end_parsing scopeName
|
83
|
+
|
84
|
+
processor
|
85
|
+
end
|
86
|
+
|
87
|
+
protected
|
88
|
+
|
89
|
+
def parse_and_store_syntax_info(table)
|
90
|
+
table.each do |key, value|
|
91
|
+
case key
|
92
|
+
when "firstLineMatch", "foldingStartMarker", "foldingStopMarker", "match", "begin"
|
93
|
+
instance_variable_set("@#{key}", parse_regex(value))
|
94
|
+
when "content", "fileTypes", "name", "contentName", "end", "scopeName", "keyEquivalent"
|
95
|
+
instance_variable_set("@#{key}", value)
|
96
|
+
when "captures", "beginCaptures", "endCaptures"
|
97
|
+
instance_variable_set("@#{key}", value.sort)
|
98
|
+
when "repository"
|
99
|
+
parse_repository value
|
100
|
+
when "patterns"
|
101
|
+
create_children value
|
102
|
+
when "comment"
|
103
|
+
else
|
104
|
+
STDERR.puts "Ignoring: #{key} => #{value.gsub("\n", "\n>>")}" if $DEBUG
|
105
|
+
end
|
79
106
|
end
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
case key
|
88
|
-
when "firstLineMatch", "foldingStartMarker", "foldingStopMarker", "match", "begin"
|
89
|
-
begin
|
90
|
-
instance_variable_set( "@#{key}", Oniguruma::ORegexp.new( value, OPTIONS ) )
|
91
|
-
rescue ArgumentError => e
|
92
|
-
raise ParsingError, "Parsing error in #{value}: #{e.to_s}"
|
93
|
-
end
|
94
|
-
when "content", "fileTypes", "name", "contentName", "end", "scopeName", "keyEquivalent"
|
95
|
-
instance_variable_set( "@#{key}", value )
|
96
|
-
when "captures", "beginCaptures", "endCaptures"
|
97
|
-
instance_variable_set( "@#{key}", value.sort )
|
98
|
-
when "repository"
|
99
|
-
parse_repository value
|
100
|
-
when "patterns"
|
101
|
-
create_children value
|
102
|
-
else
|
103
|
-
STDERR.puts "Ignoring: #{key} => #{value.gsub("\n", "\n>>")}" if $DEBUG
|
104
|
-
end
|
105
|
-
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse_regex(value)
|
110
|
+
if Textpow::RUBY_19
|
111
|
+
parse_regex_with_invalid_chars(value)
|
112
|
+
else
|
113
|
+
Oniguruma::ORegexp.new(value, :options => Oniguruma::OPTION_CAPTURE_GROUP)
|
106
114
|
end
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
115
|
+
rescue RegexpError, ArgumentError => e
|
116
|
+
raise ParsingError, "Parsing error in #{value}: #{e.to_s}"
|
117
|
+
end
|
118
|
+
|
119
|
+
def parse_regex_with_invalid_chars(value)
|
120
|
+
Regexp.new(value.force_encoding('UTF-8'))
|
121
|
+
rescue RegexpError => e
|
122
|
+
if e.message =~ /UTF-8/ or e.message =~ /invalid multibyte escape/
|
123
|
+
puts "Ignored utf8 regex error #{$!}"
|
124
|
+
/INVALID_UTF8/
|
125
|
+
else
|
126
|
+
raise e
|
111
127
|
end
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
128
|
+
end
|
129
|
+
|
130
|
+
# register in global syntax list -> can be found by include
|
131
|
+
def register_in_syntaxes(scope)
|
132
|
+
@@syntaxes[@name_space] ||= {}
|
133
|
+
@@syntaxes[@name_space][scope] = self if scope
|
134
|
+
end
|
135
|
+
|
136
|
+
def self.convert_file_to_table(file)
|
137
|
+
raise "File not found: #{file}" unless File.exist?(file)
|
138
|
+
case file
|
139
|
+
when /(\.tmSyntax|\.plist)$/
|
140
|
+
require 'plist'
|
141
|
+
Plist::parse_xml(file)
|
142
|
+
else
|
143
|
+
YAML.load_file(file)
|
121
144
|
end
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
end
|
133
|
-
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def parse_repository(repository)
|
148
|
+
@repository = {}
|
149
|
+
repository.each do |key, value|
|
150
|
+
if value["include"]
|
151
|
+
@repository[key] = SyntaxProxy.new(value["include"], syntax)
|
152
|
+
else
|
153
|
+
@repository[key] = SyntaxNode.new(value, :syntax => syntax, :name_space => @name_space)
|
154
|
+
end
|
134
155
|
end
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
156
|
+
end
|
157
|
+
|
158
|
+
def create_children(patterns)
|
159
|
+
@patterns = patterns.map do |pattern|
|
160
|
+
if pattern["include"]
|
161
|
+
SyntaxProxy.new(pattern["include"], syntax)
|
162
|
+
else
|
163
|
+
SyntaxNode.new(pattern, :syntax => syntax, :name_space => @name_space)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def parse_captures name, pattern, match, processor
|
169
|
+
captures = pattern.match_captures( name, match )
|
170
|
+
captures.reject! { |group, range, name| ! range.first || range.first == range.last }
|
171
|
+
starts = []
|
172
|
+
ends = []
|
173
|
+
captures.each do |group, range, name|
|
174
|
+
starts << [range.first, group, name]
|
175
|
+
ends << [range.last, -group, name]
|
145
176
|
end
|
146
177
|
|
147
|
-
def parse_captures name, pattern, match, processor
|
148
|
-
captures = pattern.match_captures( name, match )
|
149
|
-
captures.reject! { |group, range, name| ! range.first || range.first == range.last }
|
150
|
-
starts = []
|
151
|
-
ends = []
|
152
|
-
captures.each do |group, range, name|
|
153
|
-
starts << [range.first, group, name]
|
154
|
-
ends << [range.last, -group, name]
|
155
|
-
end
|
156
|
-
|
157
178
|
# STDERR.puts '-' * 100
|
158
179
|
# starts.sort!.reverse!.each{|c| STDERR.puts c.join(', ')}
|
159
180
|
# STDERR.puts
|
160
181
|
# ends.sort!.reverse!.each{|c| STDERR.puts c.join(', ')}
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
end
|
182
|
+
starts.sort!.reverse!
|
183
|
+
ends.sort!.reverse!
|
184
|
+
|
185
|
+
while ! starts.empty? || ! ends.empty?
|
186
|
+
if starts.empty?
|
187
|
+
pos, key, name = ends.pop
|
188
|
+
processor.close_tag name, pos
|
189
|
+
elsif ends.empty?
|
190
|
+
pos, key, name = starts.pop
|
191
|
+
processor.open_tag name, pos
|
192
|
+
elsif ends.last[1].abs < starts.last[1]
|
193
|
+
pos, key, name = ends.pop
|
194
|
+
processor.close_tag name, pos
|
195
|
+
else
|
196
|
+
pos, key, name = starts.pop
|
197
|
+
processor.open_tag name, pos
|
198
|
+
end
|
179
199
|
end
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
200
|
+
end
|
201
|
+
|
202
|
+
def match_captures name, match
|
203
|
+
matches = []
|
204
|
+
captures = instance_variable_get "@#{name}"
|
205
|
+
if captures
|
206
|
+
captures.each do |key, value|
|
207
|
+
if key =~ /^\d*$/
|
208
|
+
matches << [key.to_i, match.offset( key.to_i ), value["name"]] if key.to_i < match.size
|
209
|
+
else
|
210
|
+
matches << [match.to_index( key.to_sym ), match.offset( key.to_sym), value["name"]] if match.to_index( key.to_sym )
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
matches
|
215
|
+
end
|
216
|
+
|
217
|
+
def match_first string, position
|
218
|
+
if self.match
|
219
|
+
if match = self.match.match( string, position )
|
220
|
+
return [self, match]
|
221
|
+
end
|
222
|
+
elsif self.begin
|
223
|
+
if match = self.begin.match( string, position )
|
224
|
+
return [self, match]
|
225
|
+
end
|
226
|
+
elsif self.end
|
227
|
+
else
|
228
|
+
return match_first_son( string, position )
|
194
229
|
end
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
end
|
209
|
-
nil
|
230
|
+
nil
|
231
|
+
end
|
232
|
+
|
233
|
+
def match_end string, match, position
|
234
|
+
regstring = self.end.clone
|
235
|
+
regstring.gsub!( /\\([1-9])/ ) { |s| match[$1.to_i] }
|
236
|
+
|
237
|
+
# in spox-textpow this is \\g in 1.9 !?
|
238
|
+
regstring.gsub!( /\\k<(.*?)>/ ) { |s| match[$1.to_sym] }
|
239
|
+
if Textpow::RUBY_19
|
240
|
+
Regexp.new( regstring ).match( string, position )
|
241
|
+
else
|
242
|
+
Oniguruma::ORegexp.new( regstring ).match( string, position )
|
210
243
|
end
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
244
|
+
end
|
245
|
+
|
246
|
+
# find earliest matching pattern
|
247
|
+
def match_first_son(string, position)
|
248
|
+
return if not patterns
|
249
|
+
|
250
|
+
earliest_match = nil
|
251
|
+
earliest_match_offset = nil
|
252
|
+
patterns.each do |pattern|
|
253
|
+
next unless match = pattern.match_first(string, position)
|
254
|
+
|
255
|
+
match_offset = match_offset(match[1]).first
|
256
|
+
return match if match_offset == 0 # no need to look any further
|
257
|
+
|
258
|
+
if not earliest_match or earliest_match_offset > match_offset
|
259
|
+
earliest_match = match
|
260
|
+
earliest_match_offset = match_offset
|
261
|
+
end
|
217
262
|
end
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
263
|
+
|
264
|
+
earliest_match
|
265
|
+
end
|
266
|
+
|
267
|
+
def parse_line(stack, line, processor)
|
268
|
+
processor.new_line line
|
269
|
+
top, match = stack.last
|
270
|
+
position = 0
|
271
|
+
#@ln ||= 0
|
272
|
+
#@ln += 1
|
273
|
+
#STDERR.puts @ln
|
274
|
+
loop do
|
275
|
+
if top.patterns
|
276
|
+
pattern, pattern_match = top.match_first_son(line, position)
|
277
|
+
end
|
278
|
+
|
279
|
+
if top.end
|
280
|
+
end_match = top.match_end( line, match, position )
|
281
|
+
end
|
282
|
+
|
283
|
+
if end_match and (not pattern_match or match_offset(pattern_match).first >= match_offset(end_match).first)
|
284
|
+
pattern_match = end_match
|
285
|
+
start_pos = match_offset(pattern_match).first
|
286
|
+
end_pos = match_offset(pattern_match).last
|
287
|
+
|
288
|
+
processor.close_tag top.contentName, start_pos if top.contentName
|
289
|
+
parse_captures "captures", top, pattern_match, processor
|
290
|
+
parse_captures "endCaptures", top, pattern_match, processor
|
291
|
+
processor.close_tag top.name, end_pos if top.name
|
292
|
+
stack.pop
|
293
|
+
top, match = stack.last
|
294
|
+
else
|
295
|
+
break unless pattern
|
296
|
+
|
297
|
+
start_pos = match_offset(pattern_match).first
|
298
|
+
end_pos = match_offset(pattern_match).last
|
299
|
+
|
300
|
+
if pattern.begin
|
301
|
+
processor.open_tag pattern.name, start_pos if pattern.name
|
302
|
+
parse_captures "captures", pattern, pattern_match, processor
|
303
|
+
parse_captures "beginCaptures", pattern, pattern_match, processor
|
304
|
+
processor.open_tag pattern.contentName, end_pos if pattern.contentName
|
305
|
+
top = pattern
|
306
|
+
match = pattern_match
|
307
|
+
stack << [top, match]
|
308
|
+
elsif pattern.match
|
309
|
+
processor.open_tag pattern.name, start_pos if pattern.name
|
310
|
+
parse_captures "captures", pattern, pattern_match, processor
|
311
|
+
processor.close_tag pattern.name, end_pos if pattern.name
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
position = end_pos
|
233
316
|
end
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
#STDERR.puts @ln
|
242
|
-
while true
|
243
|
-
if top.patterns
|
244
|
-
pattern, pattern_match = top.match_first_son line, position
|
245
|
-
else
|
246
|
-
pattern, pattern_match = nil
|
247
|
-
end
|
248
|
-
|
249
|
-
end_match = nil
|
250
|
-
if top.end
|
251
|
-
end_match = top.match_end( line, match, position )
|
252
|
-
end
|
253
|
-
|
254
|
-
if end_match && ( ! pattern_match || pattern_match.offset.first >= end_match.offset.first )
|
255
|
-
pattern_match = end_match
|
256
|
-
start_pos = pattern_match.offset.first
|
257
|
-
end_pos = pattern_match.offset.last
|
258
|
-
processor.close_tag top.contentName, start_pos if top.contentName && processor
|
259
|
-
parse_captures "captures", top, pattern_match, processor if processor
|
260
|
-
parse_captures "endCaptures", top, pattern_match, processor if processor
|
261
|
-
processor.close_tag top.name, end_pos if top.name && processor
|
262
|
-
stack.pop
|
263
|
-
top, match = stack.last
|
264
|
-
else
|
265
|
-
break unless pattern
|
266
|
-
start_pos = pattern_match.offset.first
|
267
|
-
end_pos = pattern_match.offset.last
|
268
|
-
if pattern.begin
|
269
|
-
processor.open_tag pattern.name, start_pos if pattern.name && processor
|
270
|
-
parse_captures "captures", pattern, pattern_match, processor if processor
|
271
|
-
parse_captures "beginCaptures", pattern, pattern_match, processor if processor
|
272
|
-
processor.open_tag pattern.contentName, end_pos if pattern.contentName && processor
|
273
|
-
top = pattern
|
274
|
-
match = pattern_match
|
275
|
-
stack << [top, match]
|
276
|
-
elsif pattern.match
|
277
|
-
processor.open_tag pattern.name, start_pos if pattern.name && processor
|
278
|
-
parse_captures "captures", pattern, pattern_match, processor if processor
|
279
|
-
processor.close_tag pattern.name, end_pos if pattern.name && processor
|
280
|
-
end
|
281
|
-
end
|
282
|
-
position = end_pos
|
283
|
-
end
|
317
|
+
end
|
318
|
+
|
319
|
+
def match_offset(match)
|
320
|
+
if Textpow::RUBY_19
|
321
|
+
match.offset(0)
|
322
|
+
else
|
323
|
+
match.offset
|
284
324
|
end
|
285
|
-
|
325
|
+
end
|
326
|
+
end
|
286
327
|
end
|