noratext 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +74 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/lib/noratext.rb +4 -0
- data/lib/noratext/lexer.rb +153 -0
- data/lib/noratext/parser.rb +43 -0
- data/lib/noratext/parser_element.rb +189 -0
- data/lib/noratext/xmly_lexer.rb +38 -0
- data/noratext.gemspec +65 -0
- data/spec/noratext_lexer_spec.rb +82 -0
- data/spec/noratext_parser_spec.rb +87 -0
- data/spec/noratext_spec.rb +5 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/ydml_grammer_definition.rb +95 -0
- metadata +104 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 KOJIMA Satoshi
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
= noratext
|
2
|
+
|
3
|
+
* http://github.com/skoji/noratext
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
noratext is a simple (and rather stupid) lexer and parser generator for loosly marked-up text.
|
8
|
+
current version supports xml-like, but not valid-xml (like old HTML) markup.
|
9
|
+
|
10
|
+
will support wiki-style markup soon.
|
11
|
+
|
12
|
+
== FEATURES
|
13
|
+
|
14
|
+
* Noratext::Lexer/Noratext::Parser provides functionalty to parse markuped text.
|
15
|
+
|
16
|
+
== PROBLEMS
|
17
|
+
|
18
|
+
* only provides xml-style tags support.
|
19
|
+
|
20
|
+
* need to write duplicated rules for Lexar and Parser. e.g., need to define same tag for Lexar and Parser.
|
21
|
+
|
22
|
+
== SYNOPSIS
|
23
|
+
|
24
|
+
=== lexer : style A
|
25
|
+
# define lexer :my_ml。
|
26
|
+
Noratext::Lexer.define :my_ml, :xml_style do
|
27
|
+
symbols :chapter, :section, :strong, :center, :right, :font-size, :blockquote, :code, :change_paragraph
|
28
|
+
|
29
|
+
without_close :change_paragraph
|
30
|
+
match_pattern :change_paragraph, 'p' # tag is <p> , not <change-paragraph>
|
31
|
+
rawtext_till_close :code
|
32
|
+
|
33
|
+
# add attribute parser
|
34
|
+
add_parser :font-size do
|
35
|
+
|s|
|
36
|
+
/size="(.?)"/ =~ s
|
37
|
+
{ :size => $1 }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# use the lexer
|
42
|
+
Noratext::Lexer[:my_ml].parse(io)
|
43
|
+
|
44
|
+
=== lexer : style B
|
45
|
+
# define symbols
|
46
|
+
Noratext::Lexer.define :my_ml, :xml_style do
|
47
|
+
symbol :chapter
|
48
|
+
symbol :section
|
49
|
+
symbol :strong
|
50
|
+
symbol :center
|
51
|
+
symbol :right
|
52
|
+
symbol :blockquote
|
53
|
+
symbol :font-size
|
54
|
+
add_parser do
|
55
|
+
|s|
|
56
|
+
/size="(.?)"/ =~ s
|
57
|
+
{ :size => $1 }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
symbol :code do
|
61
|
+
rawtext_till_close
|
62
|
+
end
|
63
|
+
symbol :change_paragraph do
|
64
|
+
without_close
|
65
|
+
match_pattern 'p'
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# use the lexer
|
70
|
+
Noratext::Lexer[:my_ml].parse(io)
|
71
|
+
|
72
|
+
== Copyright
|
73
|
+
|
74
|
+
Copyright (c) 2010 KOJIMA Satoshi. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "noratext"
|
8
|
+
gem.summary = %Q{ noratext: simple lexer/parser generator for markuped text}
|
9
|
+
gem.description = %Q{noratext is a simple (and rather stupid) lexer and parser generator for loosly markuped text. }
|
10
|
+
gem.email = "skoji@mac.com"
|
11
|
+
gem.homepage = "http://github.com/skoji/noratext"
|
12
|
+
gem.authors = ["KOJIMA Satoshi"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'spec/rake/spectask'
|
22
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
23
|
+
spec.libs << 'lib' << 'spec'
|
24
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
25
|
+
end
|
26
|
+
|
27
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
28
|
+
spec.libs << 'lib' << 'spec'
|
29
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
30
|
+
spec.rcov = true
|
31
|
+
end
|
32
|
+
|
33
|
+
task :spec => :check_dependencies
|
34
|
+
|
35
|
+
task :default => :spec
|
36
|
+
|
37
|
+
require 'rake/rdoctask'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "noratext #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.0
|
data/lib/noratext.rb
ADDED
@@ -0,0 +1,153 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Noratext
|
3
|
+
class Lexer
|
4
|
+
@instances_block = {}
|
5
|
+
def self.define(name, style = :xml_style, &block)
|
6
|
+
case style
|
7
|
+
when :xml_style
|
8
|
+
lexer = XmlyLexer.new
|
9
|
+
end
|
10
|
+
@instances_block[name] = { :lexer_class => lexer.class, :block => block }
|
11
|
+
lexer.instance_eval(&block) # for check only
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.[](name)
|
15
|
+
lexer = @instances_block[name][:lexer_class].new
|
16
|
+
lexer.instance_eval(&@instances_block[name][:block])
|
17
|
+
lexer
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@tags = {}
|
22
|
+
end
|
23
|
+
|
24
|
+
def process(io)
|
25
|
+
result = []
|
26
|
+
while line = io.gets
|
27
|
+
result = result + read_line(line, io.lineno)
|
28
|
+
end
|
29
|
+
result
|
30
|
+
end
|
31
|
+
|
32
|
+
def symbols(*symbols)
|
33
|
+
symbols.each {
|
34
|
+
|symbol|
|
35
|
+
@tags[symbol] = tag_class.new(symbol)
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
def symbol(symbol, &block)
|
40
|
+
@tags[symbol] = tag_class.new(symbol)
|
41
|
+
@tags[symbol].instance_eval(&block)
|
42
|
+
end
|
43
|
+
|
44
|
+
def match_pattern(tag, pattern)
|
45
|
+
@tags[tag].match_pattern pattern
|
46
|
+
end
|
47
|
+
|
48
|
+
def without_close(*tags)
|
49
|
+
tags.each {
|
50
|
+
|tag|
|
51
|
+
@tags[tag].without_close
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def add_parser(tag, &block)
|
56
|
+
@tags[tag].attribute_parsers << block
|
57
|
+
end
|
58
|
+
|
59
|
+
def rawtext_till_close(tag, closetag = nil)
|
60
|
+
closetag ||= closetag_for(tag)
|
61
|
+
@tags[tag].rawtext_till_close closetag
|
62
|
+
end
|
63
|
+
|
64
|
+
def read_line(s, line_no)
|
65
|
+
return [] if s == ""
|
66
|
+
result = []
|
67
|
+
|
68
|
+
if @rawtext_tag
|
69
|
+
matched = /#{@rawtext_close_tag}/.match(s)
|
70
|
+
if matched.nil?
|
71
|
+
return [{ :type => :text, :data => s, :line => line_no }]
|
72
|
+
else
|
73
|
+
result << { :type => :text, :data => matched.pre_match, :line => line_no }
|
74
|
+
result << {
|
75
|
+
:type => @rawtext_tag.name,
|
76
|
+
:data => matched[0],
|
77
|
+
:line => line_no,
|
78
|
+
:tag => { :name => @rawtext_tag.name }.merge(@rawtext_tag.parse_attribute(matched[0])) }
|
79
|
+
|
80
|
+
@rawtext_tag = nil
|
81
|
+
@rawtext_close_tag = nil
|
82
|
+
return result + read_line(matched.post_match, line_no)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
t = factory(s)
|
87
|
+
if (t.nil?)
|
88
|
+
return [{ :type => :text, :data => s, :line => line_no }]
|
89
|
+
else
|
90
|
+
result << { :type => :text, :data => t[:pre], :line => line_no } if t[:pre] != ""
|
91
|
+
result << { :type => t[:tag][:name], :data => t[:data], :line => line_no, :tag => t[:tag] }
|
92
|
+
result + read_line(t[:rest], line_no)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def factory(s)
|
97
|
+
matched = @tags.map {
|
98
|
+
|name, tag|
|
99
|
+
{ :tag => tag, :match => tag.matcher(s) }
|
100
|
+
}.select { |m| !m[:match].nil? }.sort_by { |m| m[:match].begin(0) }
|
101
|
+
|
102
|
+
return nil if matched.size == 0
|
103
|
+
m = matched[0][:match]
|
104
|
+
tag = matched[0][:tag]
|
105
|
+
|
106
|
+
@rawtext_tag = tag if @rawtext_close_tag = tag.rawtext_till_close_tag
|
107
|
+
{ :pre => m.pre_match,
|
108
|
+
:rest => m.post_match,
|
109
|
+
:data => m[0],
|
110
|
+
:tag => { :name => tag.name }.merge(tag.parse_attribute(m[0])) }
|
111
|
+
end
|
112
|
+
|
113
|
+
class Tag
|
114
|
+
attr_accessor :name, :attribute_parsers, :rawtext_till_close_tag
|
115
|
+
|
116
|
+
def initialize(name)
|
117
|
+
@name = name
|
118
|
+
@match_pattern = name.to_s
|
119
|
+
@attribute_parsers = []
|
120
|
+
@with_close = true
|
121
|
+
@rawtext_till_close_tag = nil
|
122
|
+
end
|
123
|
+
|
124
|
+
def without_close
|
125
|
+
@with_close = false
|
126
|
+
end
|
127
|
+
|
128
|
+
def rawtext_till_close(tag = nil)
|
129
|
+
tag ||= closetag_for(@name)
|
130
|
+
@rawtext_till_close_tag = tag
|
131
|
+
end
|
132
|
+
|
133
|
+
def add_parser(&block)
|
134
|
+
@attribute_parsers << block
|
135
|
+
end
|
136
|
+
|
137
|
+
def match_pattern(pattern)
|
138
|
+
@match_pattern = pattern
|
139
|
+
end
|
140
|
+
|
141
|
+
def parse_attribute(s)
|
142
|
+
result = {}
|
143
|
+
@attribute_parsers.each {
|
144
|
+
|parser|
|
145
|
+
result.merge!(parser.call(s))
|
146
|
+
}
|
147
|
+
result
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Noratext
|
2
|
+
class Parser
|
3
|
+
@instances_block = {}
|
4
|
+
def self.define(name, style = :xml_style, &block)
|
5
|
+
parser = Parser.new
|
6
|
+
@instances_block[name] = block
|
7
|
+
parser.instance_eval(&block) # for check only
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.[](name)
|
11
|
+
parser = Parser.new
|
12
|
+
parser.instance_eval(&@instances_block[name])
|
13
|
+
parser
|
14
|
+
end
|
15
|
+
|
16
|
+
def element(name, &block)
|
17
|
+
element = Element.new(name, method(:logger), @elements)
|
18
|
+
@start_element = element if @elements.size == 0
|
19
|
+
@elements[name] = element
|
20
|
+
element.instance_eval(&block)
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
@elements = {}
|
25
|
+
@log = []
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse(sequence)
|
29
|
+
result = @start_element.process(sequence)
|
30
|
+
logger sequence[0][:line], "unexpected #{sequence[0][:data]}" if (sequence.size > 0)
|
31
|
+
result
|
32
|
+
end
|
33
|
+
|
34
|
+
def logger(lineno, log)
|
35
|
+
@log << { :lineno => lineno, :log => log}
|
36
|
+
end
|
37
|
+
|
38
|
+
def log
|
39
|
+
@log.sort_by { |entry| entry[:lineno] }.map { |entry| "#{entry[:lineno]}: #{entry[:log]}" }
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
module Noratext
|
2
|
+
class Parser
|
3
|
+
|
4
|
+
module Contains
|
5
|
+
def element_to_parse
|
6
|
+
@element_to_parse ||= @contains.map { |element_name| @elements[element_name] }
|
7
|
+
end
|
8
|
+
|
9
|
+
def process(sequence)
|
10
|
+
preprocess(sequence)
|
11
|
+
children = []
|
12
|
+
while (sequence.size > 0 && elem = process_one_element(sequence))
|
13
|
+
children << elem
|
14
|
+
end
|
15
|
+
postprocess(sequence)
|
16
|
+
ParsedData.new(@name, children)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
module IsOneof
|
21
|
+
def element_to_parse
|
22
|
+
@element_to_parse ||= @is_oneof.map { |element_name| @elements[element_name] }
|
23
|
+
end
|
24
|
+
|
25
|
+
def process(sequence)
|
26
|
+
if sequence.size > 0 && elem = process_one_element(sequence)
|
27
|
+
elem
|
28
|
+
else
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module OpenClose
|
35
|
+
|
36
|
+
def accept?(token)
|
37
|
+
is_opentag(token)
|
38
|
+
end
|
39
|
+
|
40
|
+
def preprocess(sequence)
|
41
|
+
@opentag = sequence.shift[:tag]
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_end_of_element(token)
|
45
|
+
is_closetag(token)
|
46
|
+
end
|
47
|
+
|
48
|
+
def postprocess(sequence)
|
49
|
+
return if sequence.size == 0
|
50
|
+
|
51
|
+
if is_closetag(sequence[0])
|
52
|
+
sequence.shift
|
53
|
+
return
|
54
|
+
end
|
55
|
+
|
56
|
+
log @opentag[:line], "#{@name} is not closed}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
module ParseToken
|
61
|
+
def process(sequence)
|
62
|
+
return ParsedData.new(@name).set_attributes(@parse_token_proc.call(sequence.shift))
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
module ParseSequence
|
67
|
+
def process(sequence)
|
68
|
+
return ParsedData.new(@name).set_attributes(@parse_sequence_proc.call(sequence))
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
class Element
|
73
|
+
def accept?(token)
|
74
|
+
if (@accept_type)
|
75
|
+
return token[:type] == @accept_type
|
76
|
+
else
|
77
|
+
element_to_parse.each {
|
78
|
+
|element|
|
79
|
+
return true if (element.accept?(token))
|
80
|
+
}
|
81
|
+
return false
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def is_closetag(token)
|
86
|
+
token[:type] == @name &&
|
87
|
+
token[:tag][:kind] == :closetag
|
88
|
+
end
|
89
|
+
|
90
|
+
def is_opentag(token)
|
91
|
+
token[:type] == @name &&
|
92
|
+
token[:tag][:kind] == :opentag
|
93
|
+
end
|
94
|
+
|
95
|
+
def initialize(name, logger, elements)
|
96
|
+
@name = name
|
97
|
+
@logger = logger
|
98
|
+
@elements = elements
|
99
|
+
end
|
100
|
+
|
101
|
+
def preprocess(sequence)
|
102
|
+
end
|
103
|
+
|
104
|
+
def postprocess(sequence)
|
105
|
+
end
|
106
|
+
|
107
|
+
def is_end_of_element(token)
|
108
|
+
false
|
109
|
+
end
|
110
|
+
|
111
|
+
def process_one_element(sequence)
|
112
|
+
|
113
|
+
while (sequence.size > 0 && sequence[0][:kind] == :closetag)
|
114
|
+
return nil if (is_end_of_element(sequence[0]))
|
115
|
+
log sequence[0][:line],"no opentag for #{sequence[0][:type]}"
|
116
|
+
sequence.shift
|
117
|
+
end
|
118
|
+
return nil if (sequence.size == 0)
|
119
|
+
|
120
|
+
element_to_parse.each {
|
121
|
+
|element|
|
122
|
+
return element.process(sequence) if element.accept?(sequence[0])
|
123
|
+
}
|
124
|
+
nil
|
125
|
+
end
|
126
|
+
|
127
|
+
def contains(*array)
|
128
|
+
raise 'already is_oneof are defined' if !@is_oneof.nil?
|
129
|
+
@contains = array
|
130
|
+
extend Contains
|
131
|
+
end
|
132
|
+
|
133
|
+
def is_oneof(*array)
|
134
|
+
raise 'already contains are defined' if !@contains.nil?
|
135
|
+
@is_oneof = array
|
136
|
+
extend IsOneof
|
137
|
+
end
|
138
|
+
|
139
|
+
def open_close
|
140
|
+
extend OpenClose
|
141
|
+
end
|
142
|
+
|
143
|
+
def accepts(type)
|
144
|
+
@accept_type = type
|
145
|
+
end
|
146
|
+
|
147
|
+
def parse_token(&block)
|
148
|
+
raise 'already defined as parse sequence type' if !@parse_sequence_proc.nil?
|
149
|
+
@parse_token_proc = block
|
150
|
+
@accept_type ||= @name
|
151
|
+
extend ParseToken
|
152
|
+
end
|
153
|
+
|
154
|
+
def parse_sequence(&block)
|
155
|
+
raise 'already defined as parse token type' if !@parse_token_proc.nil?
|
156
|
+
@parse_sequence_proc = block
|
157
|
+
@accept_type ||= @name
|
158
|
+
extend ParseSequence
|
159
|
+
end
|
160
|
+
|
161
|
+
def log(lineno, log)
|
162
|
+
@logger.call(lineno, log) if !@logger.nil?
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
class ParsedData
|
167
|
+
attr_accessor :type, :children
|
168
|
+
|
169
|
+
def initialize(type, children = [])
|
170
|
+
@children = children
|
171
|
+
@type = type
|
172
|
+
@attributes = {}
|
173
|
+
end
|
174
|
+
|
175
|
+
def is_leaf?
|
176
|
+
@children.size == 0
|
177
|
+
end
|
178
|
+
|
179
|
+
def set_attributes(value)
|
180
|
+
@attributes.merge!(value)
|
181
|
+
value.each {
|
182
|
+
|k,v|
|
183
|
+
(class<<self;self;end).instance_eval{define_method(k){v}}
|
184
|
+
}
|
185
|
+
self
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Noratext
|
3
|
+
class XmlyLexer < Lexer
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
super
|
7
|
+
end
|
8
|
+
|
9
|
+
def tag_class
|
10
|
+
return XmlyTag
|
11
|
+
end
|
12
|
+
|
13
|
+
def closetag_for(tag)
|
14
|
+
"</#{tag.to_s}>"
|
15
|
+
end
|
16
|
+
|
17
|
+
class XmlyTag < Lexer::Tag
|
18
|
+
def self.default_attribute_parser
|
19
|
+
lambda {
|
20
|
+
|s|
|
21
|
+
{ :kind => /^<\// =~ s ? :closetag : :opentag }
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize(name)
|
26
|
+
super
|
27
|
+
@attribute_parsers << self.class.default_attribute_parser
|
28
|
+
end
|
29
|
+
|
30
|
+
def matcher(s)
|
31
|
+
tagpattern = "<#{@match_pattern}.*?>"
|
32
|
+
tagpattern = "</?#{@match_pattern}.*?>" if @with_close
|
33
|
+
/#{tagpattern}/.match s
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
data/noratext.gemspec
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{noratext}
|
8
|
+
s.version = "0.0.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["KOJIMA Satoshi"]
|
12
|
+
s.date = %q{2010-07-13}
|
13
|
+
s.description = %q{noratext is a simple (and rather stupid) lexer and parser generator for loosly markuped text. }
|
14
|
+
s.email = %q{skoji@mac.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"lib/noratext.rb",
|
27
|
+
"lib/noratext/lexer.rb",
|
28
|
+
"lib/noratext/parser.rb",
|
29
|
+
"lib/noratext/parser_element.rb",
|
30
|
+
"lib/noratext/xmly_lexer.rb",
|
31
|
+
"noratext.gemspec",
|
32
|
+
"spec/noratext_lexer_spec.rb",
|
33
|
+
"spec/noratext_parser_spec.rb",
|
34
|
+
"spec/noratext_spec.rb",
|
35
|
+
"spec/spec.opts",
|
36
|
+
"spec/spec_helper.rb",
|
37
|
+
"spec/ydml_grammer_definition.rb"
|
38
|
+
]
|
39
|
+
s.homepage = %q{http://github.com/skoji/noratext}
|
40
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = %q{1.3.7}
|
43
|
+
s.summary = %q{noratext: simple lexer/parser generator for markuped text}
|
44
|
+
s.test_files = [
|
45
|
+
"spec/noratext_lexer_spec.rb",
|
46
|
+
"spec/noratext_parser_spec.rb",
|
47
|
+
"spec/noratext_spec.rb",
|
48
|
+
"spec/spec_helper.rb",
|
49
|
+
"spec/ydml_grammer_definition.rb"
|
50
|
+
]
|
51
|
+
|
52
|
+
if s.respond_to? :specification_version then
|
53
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
54
|
+
s.specification_version = 3
|
55
|
+
|
56
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
57
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
58
|
+
else
|
59
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
60
|
+
end
|
61
|
+
else
|
62
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
|
4
|
+
describe "Noratext::Lexer" do
|
5
|
+
before do
|
6
|
+
Noratext::Lexer.define :test do
|
7
|
+
symbols :center, :left, :right, :quote
|
8
|
+
rawtext_till_close :quote
|
9
|
+
|
10
|
+
symbol :paragraph do
|
11
|
+
match_pattern 'p'
|
12
|
+
without_close
|
13
|
+
end
|
14
|
+
|
15
|
+
symbol :image do
|
16
|
+
match_pattern 'img'
|
17
|
+
without_close
|
18
|
+
add_parser do
|
19
|
+
|s|
|
20
|
+
/src="(.*?)"/ =~ s
|
21
|
+
path = $1
|
22
|
+
/scale="(.*?)"/ =~ s
|
23
|
+
scale = $1
|
24
|
+
{ :path => path, :scale => scale }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should parse tag correctly" do
|
31
|
+
lexer = Noratext::Lexer[:test]
|
32
|
+
opentag = lexer.factory('<center>')
|
33
|
+
closetag = lexer.factory('</center>')
|
34
|
+
|
35
|
+
opentag[:tag][:name].should == :center
|
36
|
+
opentag[:tag][:kind].should == :opentag
|
37
|
+
|
38
|
+
closetag[:tag][:name].should == :center
|
39
|
+
closetag[:tag][:kind].should == :closetag
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should create parse path" do
|
43
|
+
lexer = Noratext::Lexer[:test]
|
44
|
+
tag = lexer.factory('<img src="../img/path.jpg">')
|
45
|
+
tag[:tag][:name].should == :image
|
46
|
+
tag[:tag][:kind].should == :opentag
|
47
|
+
tag[:tag][:path].should == '../img/path.jpg'
|
48
|
+
tag[:tag][:scale].should be_nil
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should create parse path" do
|
52
|
+
lexer = Noratext::Lexer[:test]
|
53
|
+
tag = lexer.factory('<img src="../img/path.jpg" scale="90%">')
|
54
|
+
tag[:tag][:name].should == :image
|
55
|
+
tag[:tag][:kind].should == :opentag
|
56
|
+
tag[:tag][:path].should == '../img/path.jpg'
|
57
|
+
tag[:tag][:scale].should == '90%'
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should parse rawtext tag" do
|
61
|
+
lexer = Noratext::Lexer[:test]
|
62
|
+
text = "<quote>この部分は、<center>とかはいっていても、そのまま見えるはず。
|
63
|
+
<bold>改行</bold>しても、扱えるはず。</quote>このへんは、タグを<center>読む。"
|
64
|
+
io = StringIO.new(text)
|
65
|
+
|
66
|
+
processed = lexer.process(io)
|
67
|
+
processed.size.should == 7
|
68
|
+
processed[0][:type].should == :quote
|
69
|
+
processed[1][:type].should == :text
|
70
|
+
processed[1][:data].should == "この部分は、<center>とかはいっていても、そのまま見えるはず。\n"
|
71
|
+
processed[2][:type].should == :text
|
72
|
+
processed[2][:data].should == '<bold>改行</bold>しても、扱えるはず。'
|
73
|
+
processed[3][:type].should == :quote
|
74
|
+
processed[4][:type].should == :text
|
75
|
+
processed[4][:data].should == 'このへんは、タグを'
|
76
|
+
processed[5][:type].should == :center
|
77
|
+
processed[6][:type].should == :text
|
78
|
+
processed[6][:data].should == '読む。'
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + '/ydml_grammer_definition')
|
4
|
+
|
5
|
+
describe "Noratext::Parser" do
|
6
|
+
|
7
|
+
it "should accept open-close" do
|
8
|
+
element = Noratext::Parser::Element.new(:center, nil, nil)
|
9
|
+
element.open_close
|
10
|
+
element.accept?({ :type => :center, :tag => { :name => :center, :kind => :opentag }}).should be_true
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should accept text" do
|
14
|
+
element = Noratext::Parser::Element.new(:text, nil, nil)
|
15
|
+
element.accepts :text
|
16
|
+
element.accept?({ :type => :text })
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should accept open-close" do
|
20
|
+
elements = {}
|
21
|
+
elements[:text] = Noratext::Parser::Element.new(:text, nil, elements)
|
22
|
+
elements[:text].accepts :text
|
23
|
+
elements[:text].parse_sequence do
|
24
|
+
|s|
|
25
|
+
a = s.shift
|
26
|
+
{ :data => a[:data] }
|
27
|
+
end
|
28
|
+
element = Noratext::Parser::Element.new(:center, nil, elements)
|
29
|
+
element.open_close
|
30
|
+
element.contains :text
|
31
|
+
element.accept?({ :type => :center, :tag => { :name => :center, :kind => :opentag }}).should be_true
|
32
|
+
a = element.process([{ :type => :center, :tag => { :name => :center, :kind => :opentag }},
|
33
|
+
{ :type => :text, :data => "foobar" },
|
34
|
+
{ :type => :center, :tag => { :name => :center, :kind => :closetag}}])
|
35
|
+
a.type.should == :center
|
36
|
+
a.is_leaf?.should_not be_true
|
37
|
+
a.children.size.should == 1
|
38
|
+
a.children[0].type.should == :text
|
39
|
+
a.children[0].is_leaf?.should be_true
|
40
|
+
a.children[0].data.should == "foobar"
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
describe Noratext::Parser do
|
46
|
+
it "should parse valid test" do
|
47
|
+
seq = [ { :type => :text, :data=>'これが中身', :line => 1} ]
|
48
|
+
result = Noratext::Parser[:ydml].parse(seq)
|
49
|
+
result.type.should == :document
|
50
|
+
result.is_leaf?.should_not be_true
|
51
|
+
result.children.size.should == 1
|
52
|
+
result.children[0].is_leaf?.should_not be_true
|
53
|
+
result.children[0].type.should == :paragraph
|
54
|
+
result.children[0].children.size.should == 1
|
55
|
+
result.children[0].children[0].type.should == :text
|
56
|
+
result.children[0].children[0].data.should == 'これが中身'
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should parse nested data" do
|
60
|
+
seq = [ { :type => :center, :tag => { :name => :center, :kind => :opentag}},
|
61
|
+
{ :type => :text, :data=>'センタリング', :line => 1},
|
62
|
+
{ :type => :center, :tag => { :name => :center, :kind => :closetag}},
|
63
|
+
{ :type => :ruby, :tag => { :name => :ruby, :kind => :opentag }},
|
64
|
+
{ :type => :text, :data => '蜻蛉/とんぼ', :line => 2 },
|
65
|
+
{ :type => :ruby,:tag => { :name => :ruby, :kind => :closetag }}
|
66
|
+
]
|
67
|
+
|
68
|
+
parser = Noratext::Parser[:ydml]
|
69
|
+
result = parser.parse(seq)
|
70
|
+
result.type.should == :document
|
71
|
+
parser.log.size.should == 0
|
72
|
+
result.children.size.should == 2
|
73
|
+
result.children[0].is_leaf?.should_not be_true
|
74
|
+
result.children[0].type.should == :center
|
75
|
+
result.children[0].children.size.should == 1
|
76
|
+
result.children[0].children[0].type.should == :paragraph
|
77
|
+
result.children[0].children[0].children[0].data.should == 'センタリング'
|
78
|
+
result.children[1].type.should == :paragraph
|
79
|
+
result.children[1].children.size.should == 1
|
80
|
+
result.children[1].children[0].type.should == :ruby
|
81
|
+
result.children[1].children[0].ruby.should == 'とんぼ'
|
82
|
+
result.children[1].children[0].body.should == '蜻蛉'
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
Noratext::Parser.define :ydml do
|
2
|
+
element :document do
|
3
|
+
contains :block
|
4
|
+
end
|
5
|
+
|
6
|
+
element :block do
|
7
|
+
is_oneof :paragraph, :center, :left, :right, :quote, :hasen
|
8
|
+
end
|
9
|
+
|
10
|
+
element :paragraph do
|
11
|
+
contains :text, :large, :small, :bold, :image, :sonomama, :ruby
|
12
|
+
end
|
13
|
+
|
14
|
+
element :center do
|
15
|
+
open_close
|
16
|
+
contains :paragraph
|
17
|
+
end
|
18
|
+
|
19
|
+
element :left do
|
20
|
+
open_close
|
21
|
+
contains :paragraph
|
22
|
+
end
|
23
|
+
|
24
|
+
element :right do
|
25
|
+
open_close
|
26
|
+
contains :paragraph
|
27
|
+
end
|
28
|
+
|
29
|
+
element :quote do
|
30
|
+
open_close
|
31
|
+
contains :paragraph
|
32
|
+
end
|
33
|
+
|
34
|
+
element :hasen do
|
35
|
+
parse_token do
|
36
|
+
|token|
|
37
|
+
{}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
element :text do
|
42
|
+
accepts :text
|
43
|
+
parse_sequence do
|
44
|
+
|sequence|
|
45
|
+
data = ""
|
46
|
+
while (sequence.size > 0 &&
|
47
|
+
sequence[0][:type] == :text)
|
48
|
+
data << sequence.shift[:data]
|
49
|
+
end
|
50
|
+
{ :data => data }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
element :large do
|
55
|
+
open_close
|
56
|
+
contains :paragraph
|
57
|
+
end
|
58
|
+
|
59
|
+
element :small do
|
60
|
+
open_close
|
61
|
+
contains :paragraph
|
62
|
+
end
|
63
|
+
|
64
|
+
element :bold do
|
65
|
+
open_close
|
66
|
+
contains :paragraph
|
67
|
+
end
|
68
|
+
|
69
|
+
element :image do
|
70
|
+
parse_token do
|
71
|
+
|token|
|
72
|
+
{ :imagepath => token[:imagepath], :size => token[:size] }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
element :sonomama do
|
77
|
+
open_close
|
78
|
+
contains :text
|
79
|
+
end
|
80
|
+
|
81
|
+
element :ruby do
|
82
|
+
parse_sequence do
|
83
|
+
|sequence|
|
84
|
+
sequence.shift
|
85
|
+
/(.+)\/(.+)/ =~ sequence[0][:data]
|
86
|
+
raise "#{sequence[0][:data]} is invalid inside ruby tag." if $1.nil?
|
87
|
+
sequence.shift
|
88
|
+
raise "rubytag is not closed" if !is_closetag(sequence[0])
|
89
|
+
sequence.shift
|
90
|
+
{ :body => $1, :ruby => $2 }
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
metadata
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: noratext
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 31
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 0.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- KOJIMA Satoshi
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-07-13 00:00:00 +09:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rspec
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 13
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 2
|
33
|
+
- 9
|
34
|
+
version: 1.2.9
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
description: "noratext is a simple (and rather stupid) lexer and parser generator for loosly markuped text. "
|
38
|
+
email: skoji@mac.com
|
39
|
+
executables: []
|
40
|
+
|
41
|
+
extensions: []
|
42
|
+
|
43
|
+
extra_rdoc_files:
|
44
|
+
- LICENSE
|
45
|
+
- README.rdoc
|
46
|
+
files:
|
47
|
+
- .document
|
48
|
+
- .gitignore
|
49
|
+
- LICENSE
|
50
|
+
- README.rdoc
|
51
|
+
- Rakefile
|
52
|
+
- VERSION
|
53
|
+
- lib/noratext.rb
|
54
|
+
- lib/noratext/lexer.rb
|
55
|
+
- lib/noratext/parser.rb
|
56
|
+
- lib/noratext/parser_element.rb
|
57
|
+
- lib/noratext/xmly_lexer.rb
|
58
|
+
- noratext.gemspec
|
59
|
+
- spec/noratext_lexer_spec.rb
|
60
|
+
- spec/noratext_parser_spec.rb
|
61
|
+
- spec/noratext_spec.rb
|
62
|
+
- spec/spec.opts
|
63
|
+
- spec/spec_helper.rb
|
64
|
+
- spec/ydml_grammer_definition.rb
|
65
|
+
has_rdoc: true
|
66
|
+
homepage: http://github.com/skoji/noratext
|
67
|
+
licenses: []
|
68
|
+
|
69
|
+
post_install_message:
|
70
|
+
rdoc_options:
|
71
|
+
- --charset=UTF-8
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
hash: 3
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
hash: 3
|
89
|
+
segments:
|
90
|
+
- 0
|
91
|
+
version: "0"
|
92
|
+
requirements: []
|
93
|
+
|
94
|
+
rubyforge_project:
|
95
|
+
rubygems_version: 1.3.7
|
96
|
+
signing_key:
|
97
|
+
specification_version: 3
|
98
|
+
summary: "noratext: simple lexer/parser generator for markuped text"
|
99
|
+
test_files:
|
100
|
+
- spec/noratext_lexer_spec.rb
|
101
|
+
- spec/noratext_parser_spec.rb
|
102
|
+
- spec/noratext_spec.rb
|
103
|
+
- spec/spec_helper.rb
|
104
|
+
- spec/ydml_grammer_definition.rb
|