noratext 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +74 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/lib/noratext.rb +4 -0
- data/lib/noratext/lexer.rb +153 -0
- data/lib/noratext/parser.rb +43 -0
- data/lib/noratext/parser_element.rb +189 -0
- data/lib/noratext/xmly_lexer.rb +38 -0
- data/noratext.gemspec +65 -0
- data/spec/noratext_lexer_spec.rb +82 -0
- data/spec/noratext_parser_spec.rb +87 -0
- data/spec/noratext_spec.rb +5 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/ydml_grammer_definition.rb +95 -0
- metadata +104 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 KOJIMA Satoshi
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
= noratext
|
2
|
+
|
3
|
+
* http://github.com/skoji/noratext
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
noratext is a simple (and rather stupid) lexer and parser generator for loosly marked-up text.
|
8
|
+
current version supports xml-like, but not valid-xml (like old HTML) markup.
|
9
|
+
|
10
|
+
will support wiki-style markup soon.
|
11
|
+
|
12
|
+
== FEATURES
|
13
|
+
|
14
|
+
* Noratext::Lexer/Noratext::Parser provides functionalty to parse markuped text.
|
15
|
+
|
16
|
+
== PROBLEMS
|
17
|
+
|
18
|
+
* only provides xml-style tags support.
|
19
|
+
|
20
|
+
* need to write duplicated rules for Lexar and Parser. e.g., need to define same tag for Lexar and Parser.
|
21
|
+
|
22
|
+
== SYNOPSIS
|
23
|
+
|
24
|
+
=== lexer : style A
|
25
|
+
# define lexer :my_ml。
|
26
|
+
Noratext::Lexer.define :my_ml, :xml_style do
|
27
|
+
symbols :chapter, :section, :strong, :center, :right, :font-size, :blockquote, :code, :change_paragraph
|
28
|
+
|
29
|
+
without_close :change_paragraph
|
30
|
+
match_pattern :change_paragraph, 'p' # tag is <p> , not <change-paragraph>
|
31
|
+
rawtext_till_close :code
|
32
|
+
|
33
|
+
# add attribute parser
|
34
|
+
add_parser :font-size do
|
35
|
+
|s|
|
36
|
+
/size="(.?)"/ =~ s
|
37
|
+
{ :size => $1 }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# use the lexer
|
42
|
+
Noratext::Lexer[:my_ml].parse(io)
|
43
|
+
|
44
|
+
=== lexer : style B
|
45
|
+
# define symbols
|
46
|
+
Noratext::Lexer.define :my_ml, :xml_style do
|
47
|
+
symbol :chapter
|
48
|
+
symbol :section
|
49
|
+
symbol :strong
|
50
|
+
symbol :center
|
51
|
+
symbol :right
|
52
|
+
symbol :blockquote
|
53
|
+
symbol :font-size
|
54
|
+
add_parser do
|
55
|
+
|s|
|
56
|
+
/size="(.?)"/ =~ s
|
57
|
+
{ :size => $1 }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
symbol :code do
|
61
|
+
rawtext_till_close
|
62
|
+
end
|
63
|
+
symbol :change_paragraph do
|
64
|
+
without_close
|
65
|
+
match_pattern 'p'
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# use the lexer
|
70
|
+
Noratext::Lexer[:my_ml].parse(io)
|
71
|
+
|
72
|
+
== Copyright
|
73
|
+
|
74
|
+
Copyright (c) 2010 KOJIMA Satoshi. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "noratext"
|
8
|
+
gem.summary = %Q{ noratext: simple lexer/parser generator for markuped text}
|
9
|
+
gem.description = %Q{noratext is a simple (and rather stupid) lexer and parser generator for loosly markuped text. }
|
10
|
+
gem.email = "skoji@mac.com"
|
11
|
+
gem.homepage = "http://github.com/skoji/noratext"
|
12
|
+
gem.authors = ["KOJIMA Satoshi"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'spec/rake/spectask'
|
22
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
23
|
+
spec.libs << 'lib' << 'spec'
|
24
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
25
|
+
end
|
26
|
+
|
27
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
28
|
+
spec.libs << 'lib' << 'spec'
|
29
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
30
|
+
spec.rcov = true
|
31
|
+
end
|
32
|
+
|
33
|
+
task :spec => :check_dependencies
|
34
|
+
|
35
|
+
task :default => :spec
|
36
|
+
|
37
|
+
require 'rake/rdoctask'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "noratext #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0
|
data/lib/noratext.rb
ADDED
@@ -0,0 +1,153 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Noratext
|
3
|
+
class Lexer
|
4
|
+
@instances_block = {}
|
5
|
+
def self.define(name, style = :xml_style, &block)
|
6
|
+
case style
|
7
|
+
when :xml_style
|
8
|
+
lexer = XmlyLexer.new
|
9
|
+
end
|
10
|
+
@instances_block[name] = { :lexer_class => lexer.class, :block => block }
|
11
|
+
lexer.instance_eval(&block) # for check only
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.[](name)
|
15
|
+
lexer = @instances_block[name][:lexer_class].new
|
16
|
+
lexer.instance_eval(&@instances_block[name][:block])
|
17
|
+
lexer
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@tags = {}
|
22
|
+
end
|
23
|
+
|
24
|
+
def process(io)
|
25
|
+
result = []
|
26
|
+
while line = io.gets
|
27
|
+
result = result + read_line(line, io.lineno)
|
28
|
+
end
|
29
|
+
result
|
30
|
+
end
|
31
|
+
|
32
|
+
def symbols(*symbols)
|
33
|
+
symbols.each {
|
34
|
+
|symbol|
|
35
|
+
@tags[symbol] = tag_class.new(symbol)
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
def symbol(symbol, &block)
|
40
|
+
@tags[symbol] = tag_class.new(symbol)
|
41
|
+
@tags[symbol].instance_eval(&block)
|
42
|
+
end
|
43
|
+
|
44
|
+
def match_pattern(tag, pattern)
|
45
|
+
@tags[tag].match_pattern pattern
|
46
|
+
end
|
47
|
+
|
48
|
+
def without_close(*tags)
|
49
|
+
tags.each {
|
50
|
+
|tag|
|
51
|
+
@tags[tag].without_close
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def add_parser(tag, &block)
|
56
|
+
@tags[tag].attribute_parsers << block
|
57
|
+
end
|
58
|
+
|
59
|
+
def rawtext_till_close(tag, closetag = nil)
|
60
|
+
closetag ||= closetag_for(tag)
|
61
|
+
@tags[tag].rawtext_till_close closetag
|
62
|
+
end
|
63
|
+
|
64
|
+
def read_line(s, line_no)
|
65
|
+
return [] if s == ""
|
66
|
+
result = []
|
67
|
+
|
68
|
+
if @rawtext_tag
|
69
|
+
matched = /#{@rawtext_close_tag}/.match(s)
|
70
|
+
if matched.nil?
|
71
|
+
return [{ :type => :text, :data => s, :line => line_no }]
|
72
|
+
else
|
73
|
+
result << { :type => :text, :data => matched.pre_match, :line => line_no }
|
74
|
+
result << {
|
75
|
+
:type => @rawtext_tag.name,
|
76
|
+
:data => matched[0],
|
77
|
+
:line => line_no,
|
78
|
+
:tag => { :name => @rawtext_tag.name }.merge(@rawtext_tag.parse_attribute(matched[0])) }
|
79
|
+
|
80
|
+
@rawtext_tag = nil
|
81
|
+
@rawtext_close_tag = nil
|
82
|
+
return result + read_line(matched.post_match, line_no)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
t = factory(s)
|
87
|
+
if (t.nil?)
|
88
|
+
return [{ :type => :text, :data => s, :line => line_no }]
|
89
|
+
else
|
90
|
+
result << { :type => :text, :data => t[:pre], :line => line_no } if t[:pre] != ""
|
91
|
+
result << { :type => t[:tag][:name], :data => t[:data], :line => line_no, :tag => t[:tag] }
|
92
|
+
result + read_line(t[:rest], line_no)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def factory(s)
|
97
|
+
matched = @tags.map {
|
98
|
+
|name, tag|
|
99
|
+
{ :tag => tag, :match => tag.matcher(s) }
|
100
|
+
}.select { |m| !m[:match].nil? }.sort_by { |m| m[:match].begin(0) }
|
101
|
+
|
102
|
+
return nil if matched.size == 0
|
103
|
+
m = matched[0][:match]
|
104
|
+
tag = matched[0][:tag]
|
105
|
+
|
106
|
+
@rawtext_tag = tag if @rawtext_close_tag = tag.rawtext_till_close_tag
|
107
|
+
{ :pre => m.pre_match,
|
108
|
+
:rest => m.post_match,
|
109
|
+
:data => m[0],
|
110
|
+
:tag => { :name => tag.name }.merge(tag.parse_attribute(m[0])) }
|
111
|
+
end
|
112
|
+
|
113
|
+
class Tag
|
114
|
+
attr_accessor :name, :attribute_parsers, :rawtext_till_close_tag
|
115
|
+
|
116
|
+
def initialize(name)
|
117
|
+
@name = name
|
118
|
+
@match_pattern = name.to_s
|
119
|
+
@attribute_parsers = []
|
120
|
+
@with_close = true
|
121
|
+
@rawtext_till_close_tag = nil
|
122
|
+
end
|
123
|
+
|
124
|
+
def without_close
|
125
|
+
@with_close = false
|
126
|
+
end
|
127
|
+
|
128
|
+
def rawtext_till_close(tag = nil)
|
129
|
+
tag ||= closetag_for(@name)
|
130
|
+
@rawtext_till_close_tag = tag
|
131
|
+
end
|
132
|
+
|
133
|
+
def add_parser(&block)
|
134
|
+
@attribute_parsers << block
|
135
|
+
end
|
136
|
+
|
137
|
+
def match_pattern(pattern)
|
138
|
+
@match_pattern = pattern
|
139
|
+
end
|
140
|
+
|
141
|
+
def parse_attribute(s)
|
142
|
+
result = {}
|
143
|
+
@attribute_parsers.each {
|
144
|
+
|parser|
|
145
|
+
result.merge!(parser.call(s))
|
146
|
+
}
|
147
|
+
result
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Noratext
|
2
|
+
class Parser
|
3
|
+
@instances_block = {}
|
4
|
+
def self.define(name, style = :xml_style, &block)
|
5
|
+
parser = Parser.new
|
6
|
+
@instances_block[name] = block
|
7
|
+
parser.instance_eval(&block) # for check only
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.[](name)
|
11
|
+
parser = Parser.new
|
12
|
+
parser.instance_eval(&@instances_block[name])
|
13
|
+
parser
|
14
|
+
end
|
15
|
+
|
16
|
+
def element(name, &block)
|
17
|
+
element = Element.new(name, method(:logger), @elements)
|
18
|
+
@start_element = element if @elements.size == 0
|
19
|
+
@elements[name] = element
|
20
|
+
element.instance_eval(&block)
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
@elements = {}
|
25
|
+
@log = []
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse(sequence)
|
29
|
+
result = @start_element.process(sequence)
|
30
|
+
logger sequence[0][:line], "unexpected #{sequence[0][:data]}" if (sequence.size > 0)
|
31
|
+
result
|
32
|
+
end
|
33
|
+
|
34
|
+
def logger(lineno, log)
|
35
|
+
@log << { :lineno => lineno, :log => log}
|
36
|
+
end
|
37
|
+
|
38
|
+
def log
|
39
|
+
@log.sort_by { |entry| entry[:lineno] }.map { |entry| "#{entry[:lineno]}: #{entry[:log]}" }
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
module Noratext
|
2
|
+
class Parser
|
3
|
+
|
4
|
+
module Contains
|
5
|
+
def element_to_parse
|
6
|
+
@element_to_parse ||= @contains.map { |element_name| @elements[element_name] }
|
7
|
+
end
|
8
|
+
|
9
|
+
def process(sequence)
|
10
|
+
preprocess(sequence)
|
11
|
+
children = []
|
12
|
+
while (sequence.size > 0 && elem = process_one_element(sequence))
|
13
|
+
children << elem
|
14
|
+
end
|
15
|
+
postprocess(sequence)
|
16
|
+
ParsedData.new(@name, children)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
module IsOneof
|
21
|
+
def element_to_parse
|
22
|
+
@element_to_parse ||= @is_oneof.map { |element_name| @elements[element_name] }
|
23
|
+
end
|
24
|
+
|
25
|
+
def process(sequence)
|
26
|
+
if sequence.size > 0 && elem = process_one_element(sequence)
|
27
|
+
elem
|
28
|
+
else
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module OpenClose
|
35
|
+
|
36
|
+
def accept?(token)
|
37
|
+
is_opentag(token)
|
38
|
+
end
|
39
|
+
|
40
|
+
def preprocess(sequence)
|
41
|
+
@opentag = sequence.shift[:tag]
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_end_of_element(token)
|
45
|
+
is_closetag(token)
|
46
|
+
end
|
47
|
+
|
48
|
+
def postprocess(sequence)
|
49
|
+
return if sequence.size == 0
|
50
|
+
|
51
|
+
if is_closetag(sequence[0])
|
52
|
+
sequence.shift
|
53
|
+
return
|
54
|
+
end
|
55
|
+
|
56
|
+
log @opentag[:line], "#{@name} is not closed}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
module ParseToken
|
61
|
+
def process(sequence)
|
62
|
+
return ParsedData.new(@name).set_attributes(@parse_token_proc.call(sequence.shift))
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
module ParseSequence
|
67
|
+
def process(sequence)
|
68
|
+
return ParsedData.new(@name).set_attributes(@parse_sequence_proc.call(sequence))
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
class Element
|
73
|
+
def accept?(token)
|
74
|
+
if (@accept_type)
|
75
|
+
return token[:type] == @accept_type
|
76
|
+
else
|
77
|
+
element_to_parse.each {
|
78
|
+
|element|
|
79
|
+
return true if (element.accept?(token))
|
80
|
+
}
|
81
|
+
return false
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def is_closetag(token)
|
86
|
+
token[:type] == @name &&
|
87
|
+
token[:tag][:kind] == :closetag
|
88
|
+
end
|
89
|
+
|
90
|
+
def is_opentag(token)
|
91
|
+
token[:type] == @name &&
|
92
|
+
token[:tag][:kind] == :opentag
|
93
|
+
end
|
94
|
+
|
95
|
+
def initialize(name, logger, elements)
|
96
|
+
@name = name
|
97
|
+
@logger = logger
|
98
|
+
@elements = elements
|
99
|
+
end
|
100
|
+
|
101
|
+
def preprocess(sequence)
|
102
|
+
end
|
103
|
+
|
104
|
+
def postprocess(sequence)
|
105
|
+
end
|
106
|
+
|
107
|
+
def is_end_of_element(token)
|
108
|
+
false
|
109
|
+
end
|
110
|
+
|
111
|
+
def process_one_element(sequence)
|
112
|
+
|
113
|
+
while (sequence.size > 0 && sequence[0][:kind] == :closetag)
|
114
|
+
return nil if (is_end_of_element(sequence[0]))
|
115
|
+
log sequence[0][:line],"no opentag for #{sequence[0][:type]}"
|
116
|
+
sequence.shift
|
117
|
+
end
|
118
|
+
return nil if (sequence.size == 0)
|
119
|
+
|
120
|
+
element_to_parse.each {
|
121
|
+
|element|
|
122
|
+
return element.process(sequence) if element.accept?(sequence[0])
|
123
|
+
}
|
124
|
+
nil
|
125
|
+
end
|
126
|
+
|
127
|
+
def contains(*array)
|
128
|
+
raise 'already is_oneof are defined' if !@is_oneof.nil?
|
129
|
+
@contains = array
|
130
|
+
extend Contains
|
131
|
+
end
|
132
|
+
|
133
|
+
def is_oneof(*array)
|
134
|
+
raise 'already contains are defined' if !@contains.nil?
|
135
|
+
@is_oneof = array
|
136
|
+
extend IsOneof
|
137
|
+
end
|
138
|
+
|
139
|
+
def open_close
|
140
|
+
extend OpenClose
|
141
|
+
end
|
142
|
+
|
143
|
+
def accepts(type)
|
144
|
+
@accept_type = type
|
145
|
+
end
|
146
|
+
|
147
|
+
def parse_token(&block)
|
148
|
+
raise 'already defined as parse sequence type' if !@parse_sequence_proc.nil?
|
149
|
+
@parse_token_proc = block
|
150
|
+
@accept_type ||= @name
|
151
|
+
extend ParseToken
|
152
|
+
end
|
153
|
+
|
154
|
+
def parse_sequence(&block)
|
155
|
+
raise 'already defined as parse token type' if !@parse_token_proc.nil?
|
156
|
+
@parse_sequence_proc = block
|
157
|
+
@accept_type ||= @name
|
158
|
+
extend ParseSequence
|
159
|
+
end
|
160
|
+
|
161
|
+
def log(lineno, log)
|
162
|
+
@logger.call(lineno, log) if !@logger.nil?
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
class ParsedData
|
167
|
+
attr_accessor :type, :children
|
168
|
+
|
169
|
+
def initialize(type, children = [])
|
170
|
+
@children = children
|
171
|
+
@type = type
|
172
|
+
@attributes = {}
|
173
|
+
end
|
174
|
+
|
175
|
+
def is_leaf?
|
176
|
+
@children.size == 0
|
177
|
+
end
|
178
|
+
|
179
|
+
def set_attributes(value)
|
180
|
+
@attributes.merge!(value)
|
181
|
+
value.each {
|
182
|
+
|k,v|
|
183
|
+
(class<<self;self;end).instance_eval{define_method(k){v}}
|
184
|
+
}
|
185
|
+
self
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Noratext
|
3
|
+
class XmlyLexer < Lexer
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
super
|
7
|
+
end
|
8
|
+
|
9
|
+
def tag_class
|
10
|
+
return XmlyTag
|
11
|
+
end
|
12
|
+
|
13
|
+
def closetag_for(tag)
|
14
|
+
"</#{tag.to_s}>"
|
15
|
+
end
|
16
|
+
|
17
|
+
class XmlyTag < Lexer::Tag
|
18
|
+
def self.default_attribute_parser
|
19
|
+
lambda {
|
20
|
+
|s|
|
21
|
+
{ :kind => /^<\// =~ s ? :closetag : :opentag }
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize(name)
|
26
|
+
super
|
27
|
+
@attribute_parsers << self.class.default_attribute_parser
|
28
|
+
end
|
29
|
+
|
30
|
+
def matcher(s)
|
31
|
+
tagpattern = "<#{@match_pattern}.*?>"
|
32
|
+
tagpattern = "</?#{@match_pattern}.*?>" if @with_close
|
33
|
+
/#{tagpattern}/.match s
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
data/noratext.gemspec
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{noratext}
|
8
|
+
s.version = "0.0.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["KOJIMA Satoshi"]
|
12
|
+
s.date = %q{2010-07-13}
|
13
|
+
s.description = %q{noratext is a simple (and rather stupid) lexer and parser generator for loosly markuped text. }
|
14
|
+
s.email = %q{skoji@mac.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"lib/noratext.rb",
|
27
|
+
"lib/noratext/lexer.rb",
|
28
|
+
"lib/noratext/parser.rb",
|
29
|
+
"lib/noratext/parser_element.rb",
|
30
|
+
"lib/noratext/xmly_lexer.rb",
|
31
|
+
"noratext.gemspec",
|
32
|
+
"spec/noratext_lexer_spec.rb",
|
33
|
+
"spec/noratext_parser_spec.rb",
|
34
|
+
"spec/noratext_spec.rb",
|
35
|
+
"spec/spec.opts",
|
36
|
+
"spec/spec_helper.rb",
|
37
|
+
"spec/ydml_grammer_definition.rb"
|
38
|
+
]
|
39
|
+
s.homepage = %q{http://github.com/skoji/noratext}
|
40
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = %q{1.3.7}
|
43
|
+
s.summary = %q{noratext: simple lexer/parser generator for markuped text}
|
44
|
+
s.test_files = [
|
45
|
+
"spec/noratext_lexer_spec.rb",
|
46
|
+
"spec/noratext_parser_spec.rb",
|
47
|
+
"spec/noratext_spec.rb",
|
48
|
+
"spec/spec_helper.rb",
|
49
|
+
"spec/ydml_grammer_definition.rb"
|
50
|
+
]
|
51
|
+
|
52
|
+
if s.respond_to? :specification_version then
|
53
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
54
|
+
s.specification_version = 3
|
55
|
+
|
56
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
57
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
58
|
+
else
|
59
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
60
|
+
end
|
61
|
+
else
|
62
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
|
4
|
+
describe "Noratext::Lexer" do
|
5
|
+
before do
|
6
|
+
Noratext::Lexer.define :test do
|
7
|
+
symbols :center, :left, :right, :quote
|
8
|
+
rawtext_till_close :quote
|
9
|
+
|
10
|
+
symbol :paragraph do
|
11
|
+
match_pattern 'p'
|
12
|
+
without_close
|
13
|
+
end
|
14
|
+
|
15
|
+
symbol :image do
|
16
|
+
match_pattern 'img'
|
17
|
+
without_close
|
18
|
+
add_parser do
|
19
|
+
|s|
|
20
|
+
/src="(.*?)"/ =~ s
|
21
|
+
path = $1
|
22
|
+
/scale="(.*?)"/ =~ s
|
23
|
+
scale = $1
|
24
|
+
{ :path => path, :scale => scale }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should parse tag correctly" do
|
31
|
+
lexer = Noratext::Lexer[:test]
|
32
|
+
opentag = lexer.factory('<center>')
|
33
|
+
closetag = lexer.factory('</center>')
|
34
|
+
|
35
|
+
opentag[:tag][:name].should == :center
|
36
|
+
opentag[:tag][:kind].should == :opentag
|
37
|
+
|
38
|
+
closetag[:tag][:name].should == :center
|
39
|
+
closetag[:tag][:kind].should == :closetag
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should create parse path" do
|
43
|
+
lexer = Noratext::Lexer[:test]
|
44
|
+
tag = lexer.factory('<img src="../img/path.jpg">')
|
45
|
+
tag[:tag][:name].should == :image
|
46
|
+
tag[:tag][:kind].should == :opentag
|
47
|
+
tag[:tag][:path].should == '../img/path.jpg'
|
48
|
+
tag[:tag][:scale].should be_nil
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should create parse path" do
|
52
|
+
lexer = Noratext::Lexer[:test]
|
53
|
+
tag = lexer.factory('<img src="../img/path.jpg" scale="90%">')
|
54
|
+
tag[:tag][:name].should == :image
|
55
|
+
tag[:tag][:kind].should == :opentag
|
56
|
+
tag[:tag][:path].should == '../img/path.jpg'
|
57
|
+
tag[:tag][:scale].should == '90%'
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should parse rawtext tag" do
|
61
|
+
lexer = Noratext::Lexer[:test]
|
62
|
+
text = "<quote>この部分は、<center>とかはいっていても、そのまま見えるはず。
|
63
|
+
<bold>改行</bold>しても、扱えるはず。</quote>このへんは、タグを<center>読む。"
|
64
|
+
io = StringIO.new(text)
|
65
|
+
|
66
|
+
processed = lexer.process(io)
|
67
|
+
processed.size.should == 7
|
68
|
+
processed[0][:type].should == :quote
|
69
|
+
processed[1][:type].should == :text
|
70
|
+
processed[1][:data].should == "この部分は、<center>とかはいっていても、そのまま見えるはず。\n"
|
71
|
+
processed[2][:type].should == :text
|
72
|
+
processed[2][:data].should == '<bold>改行</bold>しても、扱えるはず。'
|
73
|
+
processed[3][:type].should == :quote
|
74
|
+
processed[4][:type].should == :text
|
75
|
+
processed[4][:data].should == 'このへんは、タグを'
|
76
|
+
processed[5][:type].should == :center
|
77
|
+
processed[6][:type].should == :text
|
78
|
+
processed[6][:data].should == '読む。'
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/ydml_grammer_definition')
|
4
|
+
|
5
|
+
describe "Noratext::Parser" do
|
6
|
+
|
7
|
+
it "should accept open-close" do
|
8
|
+
element = Noratext::Parser::Element.new(:center, nil, nil)
|
9
|
+
element.open_close
|
10
|
+
element.accept?({ :type => :center, :tag => { :name => :center, :kind => :opentag }}).should be_true
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should accept text" do
|
14
|
+
element = Noratext::Parser::Element.new(:text, nil, nil)
|
15
|
+
element.accepts :text
|
16
|
+
element.accept?({ :type => :text })
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should accept open-close" do
|
20
|
+
elements = {}
|
21
|
+
elements[:text] = Noratext::Parser::Element.new(:text, nil, elements)
|
22
|
+
elements[:text].accepts :text
|
23
|
+
elements[:text].parse_sequence do
|
24
|
+
|s|
|
25
|
+
a = s.shift
|
26
|
+
{ :data => a[:data] }
|
27
|
+
end
|
28
|
+
element = Noratext::Parser::Element.new(:center, nil, elements)
|
29
|
+
element.open_close
|
30
|
+
element.contains :text
|
31
|
+
element.accept?({ :type => :center, :tag => { :name => :center, :kind => :opentag }}).should be_true
|
32
|
+
a = element.process([{ :type => :center, :tag => { :name => :center, :kind => :opentag }},
|
33
|
+
{ :type => :text, :data => "foobar" },
|
34
|
+
{ :type => :center, :tag => { :name => :center, :kind => :closetag}}])
|
35
|
+
a.type.should == :center
|
36
|
+
a.is_leaf?.should_not be_true
|
37
|
+
a.children.size.should == 1
|
38
|
+
a.children[0].type.should == :text
|
39
|
+
a.children[0].is_leaf?.should be_true
|
40
|
+
a.children[0].data.should == "foobar"
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
describe Noratext::Parser do
|
46
|
+
it "should parse valid test" do
|
47
|
+
seq = [ { :type => :text, :data=>'これが中身', :line => 1} ]
|
48
|
+
result = Noratext::Parser[:ydml].parse(seq)
|
49
|
+
result.type.should == :document
|
50
|
+
result.is_leaf?.should_not be_true
|
51
|
+
result.children.size.should == 1
|
52
|
+
result.children[0].is_leaf?.should_not be_true
|
53
|
+
result.children[0].type.should == :paragraph
|
54
|
+
result.children[0].children.size.should == 1
|
55
|
+
result.children[0].children[0].type.should == :text
|
56
|
+
result.children[0].children[0].data.should == 'これが中身'
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should parse nested data" do
|
60
|
+
seq = [ { :type => :center, :tag => { :name => :center, :kind => :opentag}},
|
61
|
+
{ :type => :text, :data=>'センタリング', :line => 1},
|
62
|
+
{ :type => :center, :tag => { :name => :center, :kind => :closetag}},
|
63
|
+
{ :type => :ruby, :tag => { :name => :ruby, :kind => :opentag }},
|
64
|
+
{ :type => :text, :data => '蜻蛉/とんぼ', :line => 2 },
|
65
|
+
{ :type => :ruby,:tag => { :name => :ruby, :kind => :closetag }}
|
66
|
+
]
|
67
|
+
|
68
|
+
parser = Noratext::Parser[:ydml]
|
69
|
+
result = parser.parse(seq)
|
70
|
+
result.type.should == :document
|
71
|
+
parser.log.size.should == 0
|
72
|
+
result.children.size.should == 2
|
73
|
+
result.children[0].is_leaf?.should_not be_true
|
74
|
+
result.children[0].type.should == :center
|
75
|
+
result.children[0].children.size.should == 1
|
76
|
+
result.children[0].children[0].type.should == :paragraph
|
77
|
+
result.children[0].children[0].children[0].data.should == 'センタリング'
|
78
|
+
result.children[1].type.should == :paragraph
|
79
|
+
result.children[1].children.size.should == 1
|
80
|
+
result.children[1].children[0].type.should == :ruby
|
81
|
+
result.children[1].children[0].ruby.should == 'とんぼ'
|
82
|
+
result.children[1].children[0].body.should == '蜻蛉'
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
Noratext::Parser.define :ydml do
|
2
|
+
element :document do
|
3
|
+
contains :block
|
4
|
+
end
|
5
|
+
|
6
|
+
element :block do
|
7
|
+
is_oneof :paragraph, :center, :left, :right, :quote, :hasen
|
8
|
+
end
|
9
|
+
|
10
|
+
element :paragraph do
|
11
|
+
contains :text, :large, :small, :bold, :image, :sonomama, :ruby
|
12
|
+
end
|
13
|
+
|
14
|
+
element :center do
|
15
|
+
open_close
|
16
|
+
contains :paragraph
|
17
|
+
end
|
18
|
+
|
19
|
+
element :left do
|
20
|
+
open_close
|
21
|
+
contains :paragraph
|
22
|
+
end
|
23
|
+
|
24
|
+
element :right do
|
25
|
+
open_close
|
26
|
+
contains :paragraph
|
27
|
+
end
|
28
|
+
|
29
|
+
element :quote do
|
30
|
+
open_close
|
31
|
+
contains :paragraph
|
32
|
+
end
|
33
|
+
|
34
|
+
element :hasen do
|
35
|
+
parse_token do
|
36
|
+
|token|
|
37
|
+
{}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
element :text do
|
42
|
+
accepts :text
|
43
|
+
parse_sequence do
|
44
|
+
|sequence|
|
45
|
+
data = ""
|
46
|
+
while (sequence.size > 0 &&
|
47
|
+
sequence[0][:type] == :text)
|
48
|
+
data << sequence.shift[:data]
|
49
|
+
end
|
50
|
+
{ :data => data }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
element :large do
|
55
|
+
open_close
|
56
|
+
contains :paragraph
|
57
|
+
end
|
58
|
+
|
59
|
+
element :small do
|
60
|
+
open_close
|
61
|
+
contains :paragraph
|
62
|
+
end
|
63
|
+
|
64
|
+
element :bold do
|
65
|
+
open_close
|
66
|
+
contains :paragraph
|
67
|
+
end
|
68
|
+
|
69
|
+
element :image do
|
70
|
+
parse_token do
|
71
|
+
|token|
|
72
|
+
{ :imagepath => token[:imagepath], :size => token[:size] }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
element :sonomama do
|
77
|
+
open_close
|
78
|
+
contains :text
|
79
|
+
end
|
80
|
+
|
81
|
+
element :ruby do
|
82
|
+
parse_sequence do
|
83
|
+
|sequence|
|
84
|
+
sequence.shift
|
85
|
+
/(.+)\/(.+)/ =~ sequence[0][:data]
|
86
|
+
raise "#{sequence[0][:data]} is invalid inside ruby tag." if $1.nil?
|
87
|
+
sequence.shift
|
88
|
+
raise "rubytag is not closed" if !is_closetag(sequence[0])
|
89
|
+
sequence.shift
|
90
|
+
{ :body => $1, :ruby => $2 }
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
metadata
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: noratext
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 31
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 0.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- KOJIMA Satoshi
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-07-13 00:00:00 +09:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rspec
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 13
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 2
|
33
|
+
- 9
|
34
|
+
version: 1.2.9
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
description: "noratext is a simple (and rather stupid) lexer and parser generator for loosly markuped text. "
|
38
|
+
email: skoji@mac.com
|
39
|
+
executables: []
|
40
|
+
|
41
|
+
extensions: []
|
42
|
+
|
43
|
+
extra_rdoc_files:
|
44
|
+
- LICENSE
|
45
|
+
- README.rdoc
|
46
|
+
files:
|
47
|
+
- .document
|
48
|
+
- .gitignore
|
49
|
+
- LICENSE
|
50
|
+
- README.rdoc
|
51
|
+
- Rakefile
|
52
|
+
- VERSION
|
53
|
+
- lib/noratext.rb
|
54
|
+
- lib/noratext/lexer.rb
|
55
|
+
- lib/noratext/parser.rb
|
56
|
+
- lib/noratext/parser_element.rb
|
57
|
+
- lib/noratext/xmly_lexer.rb
|
58
|
+
- noratext.gemspec
|
59
|
+
- spec/noratext_lexer_spec.rb
|
60
|
+
- spec/noratext_parser_spec.rb
|
61
|
+
- spec/noratext_spec.rb
|
62
|
+
- spec/spec.opts
|
63
|
+
- spec/spec_helper.rb
|
64
|
+
- spec/ydml_grammer_definition.rb
|
65
|
+
has_rdoc: true
|
66
|
+
homepage: http://github.com/skoji/noratext
|
67
|
+
licenses: []
|
68
|
+
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options:
|
71
|
+
- --charset=UTF-8
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
hash: 3
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
hash: 3
|
89
|
+
segments:
|
90
|
+
- 0
|
91
|
+
version: "0"
|
92
|
+
requirements: []
|
93
|
+
|
94
|
+
rubyforge_project:
|
95
|
+
rubygems_version: 1.3.7
|
96
|
+
signing_key:
|
97
|
+
specification_version: 3
|
98
|
+
summary: "noratext: simple lexer/parser generator for markuped text"
|
99
|
+
test_files:
|
100
|
+
- spec/noratext_lexer_spec.rb
|
101
|
+
- spec/noratext_parser_spec.rb
|
102
|
+
- spec/noratext_spec.rb
|
103
|
+
- spec/spec_helper.rb
|
104
|
+
- spec/ydml_grammer_definition.rb
|