pseudohikiparser 0.0.0.4.develop

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'htmlelement'
4
+ require 'pseudohiki/inlineparser'
5
+ require 'pseudohiki/htmlformat'
6
+ #require('hikiparser/hikiblockparser')
7
+
8
+ module PseudoHiki
9
+ class HtmlFormat
10
+ class << Formatter[PluginNode]
11
+ def visit(leaf)
12
+ HtmlPlugin.new(@element_name,leaf.join).apply
13
+ end
14
+ end
15
+ end
16
+
17
+ class HtmlPlugin
18
+
19
+ PLUGIN_PAT = /^(\w+)([\s\(]+)/
20
+ NUMBER_RE = /(\d+)/
21
+
22
+ def parse(data)
23
+ result = nil
24
+ if PLUGIN_PAT =~ data
25
+ @plugin_name = $1
26
+ @with_paren = true if $2.chomp == "("
27
+ result = data.chomp.sub(PLUGIN_PAT,"")
28
+ result[-1,1] = "" if @with_paren
29
+ else
30
+ @plugin_name = data.chomp
31
+ result = ""
32
+ end
33
+ result
34
+ end
35
+
36
+ def initialize(tag_type,parsed_data)
37
+ @tag_type = tag_type
38
+ @plugin_name = nil
39
+ @with_paren = nil
40
+ @data = parse(parsed_data.to_s)
41
+ end
42
+
43
+ def apply
44
+ self.send @plugin_name
45
+ end
46
+
47
+ def html
48
+ # "<div class='raw-html'>"+HtmlElement.decode(@data)+"</div>"
49
+ HtmlElement.decode(@data).to_s
50
+ end
51
+
52
+ # def inline
53
+ # lines = HtmlElement.decode(@data).split(/\r*\n/o)
54
+ # lines.shift if lines.first == ""
55
+ # HikiBlockParser.new.parse_lines(lines).join
56
+ # end
57
+
58
+ def anchor
59
+ name, anchor_mark = @data.split(/,\s*/o,2)
60
+ anchor_mark = "_" if (anchor_mark.nil? or anchor_mark.empty?)
61
+ HtmlElement.create("a", anchor_mark,
62
+ "name" => name,
63
+ "href" => "#"+name)
64
+ end
65
+
66
+ def HtmlPlugin.add_chemical_formula(chemical_formula="CO2",en_word="carbon dioxide")
67
+ eval(<<-End)
68
+ def #{chemical_formula.downcase}
69
+ #(display=":cf",second_display=nil)
70
+ display, second_display = @data.split(",\s")
71
+ display = ":cf" unless display
72
+ return [#{chemical_formula.downcase}(display),
73
+ "(",
74
+ #{chemical_formula.downcase}(second_display),
75
+ ")"].join("") if second_display
76
+ case display
77
+ when ":cf"
78
+ "#{chemical_formula}".gsub(NUMBER_RE, "<sub>\\\\1</sub>")
79
+ when ":en"
80
+ "#{en_word}"
81
+ end
82
+ end
83
+ End
84
+ end
85
+ %Q(SF6, sulfur hexafluoride
86
+ CO2, carbon dioxide
87
+ HFC, hydrofluorocarbon
88
+ PFC, perfluorocarbon
89
+ CFC, chlorofluorocarbon
90
+ CH4, methane
91
+ H2O, water
92
+ C2F5Cl, CFC-115, CFC-115).lines.each do |line|
93
+ chemical_formula, en = line.strip.split(/,\s+/)
94
+ add_chemical_formula chemical_formula, en
95
+ end
96
+
97
+ def sq
98
+ # I'm wondering if we'd be better to use &sup2; , but when we search by "km2" for example, we may have problem...
99
+ "#{@data}<sup>2</sup>"
100
+ end
101
+
102
+ def cb
103
+ # I'm wondering if we'd be better to use &sup3; , but...
104
+ "#{@data}<sup>3</sup>"
105
+ end
106
+
107
+ def per
108
+ "#{@data}<sup>-1</sup>"
109
+ end
110
+
111
+ def c_degree
112
+ "&deg;C"
113
+ end
114
+
115
+ def chemical_formula
116
+ @data.gsub(NUMBER_RE, "<sub>\\1</sub>")
117
+ end
118
+
119
+ def iso
120
+ @data.scan(/\A(\d+)([^\d].*)/o) do |data|
121
+ weight, molecule = data
122
+ if self.respond_to? molecule
123
+ return "<sup>#{weight}</sup>" + HtmlPlugin.new("",molecule).apply
124
+ else
125
+ return "<sup>#{weight}</sup>" + molecule
126
+ end
127
+ end
128
+ end
129
+
130
+ alias oc c_degree
131
+
132
+
133
+ def method_missing
134
+ HtmlElement.create(@tag_type, @data, "class" => "plugin")
135
+ end
136
+ end
137
+ end
138
+
139
+ if $0 == __FILE__
140
+ p HtmlPlugin.new("div","html(
141
+ <ul>
142
+ <li>list
143
+ <li>list
144
+ </ul>)").apply
145
+ p HtmlPlugin.new("div","inline(
146
+ *list
147
+ *list
148
+ )").apply
149
+
150
+ p HtmlPlugin.new("div","co2").apply
151
+ p HtmlPlugin.new("div","co2 :en").apply
152
+ p HtmlPlugin.new("div","cb(3km)").apply
153
+ p HtmlPlugin.new("div","per m").apply
154
+ p HtmlPlugin.new("div","iso 18co2").apply
155
+ end
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudohiki/treestack'
4
+ require 'htmlelement'
5
+
6
+ module PseudoHiki
7
+ PROTOCOL = /^((https?|file|ftp):|\.?\/)/
8
+ RELATIVE_PATH = /^\./o
9
+ ROOT_PATH = /^(\/|\\\\|[A-Za-z]:\\)/o
10
+ FILE_MARK = "file:///"
11
+ ImageSuffix = /\.(jpg|jpeg|gif|png|bmp)$/io
12
+
13
+ def self.compile_token_pat(*token_sets)
14
+ tokens = token_sets.flatten.uniq.sort do |x,y|
15
+ [y.length, y] <=> [x.length, x]
16
+ end.collect {|token| Regexp.escape(token) }
17
+ Regexp.new(tokens.join("|"))
18
+ end
19
+
20
+ class InlineParser < TreeStack
21
+ module InlineElement
22
+ class InlineNode < InlineParser::Node;end
23
+ class InlineLeaf < InlineParser::Leaf; end
24
+ # class LinkSepLeaf < InlineLeaf; end
25
+
26
+ class LinkNode < InlineNode; end
27
+ class EmNode < InlineNode; end
28
+ class StrongNode < InlineNode; end
29
+ class DelNode < InlineNode; end
30
+ class PlainNode < InlineNode; end
31
+ class PluginNode < InlineNode; end
32
+
33
+ LinkSep, TableSep, DescSep = %w(| || :)
34
+ end
35
+ include InlineElement
36
+
37
+ HEAD = {}
38
+ TAIL = {}
39
+ NodeTypeToHead = {}
40
+ TokenPat = {}
41
+
42
+ [[LinkNode, "[[", "]]"],
43
+ [EmNode, "''", "''"],
44
+ [StrongNode, "'''", "'''"],
45
+ [DelNode, "==", "=="],
46
+ [PluginNode, "{{","}}"]].each do |type, head, tail|
47
+ HEAD[head] = type
48
+ TAIL[tail] = type
49
+ NodeTypeToHead[type] = head
50
+ end
51
+
52
+ TokenPat[self] = PseudoHiki.compile_token_pat(HEAD.keys,TAIL.keys,[LinkSep, TableSep, DescSep])
53
+
54
+ def token_pat
55
+ TokenPat[self.class]
56
+ end
57
+
58
+ def initialize(str)
59
+ @tokens = split_into_tokens(str)
60
+ super()
61
+ end
62
+
63
+ def convert_last_node_into_leaf
64
+ last_node = remove_current_node
65
+ tag_head = NodeTypeToHead[last_node.class]
66
+ tag_head_leaf = InlineLeaf.create(tag_head)
67
+ self.push tag_head_leaf
68
+ last_node.each {|leaf| self.push_as_leaf leaf }
69
+ end
70
+
71
+ def node_in_ancestors?(node_class)
72
+ not @stack.select {|node| node_class == node.class }.empty?
73
+ end
74
+
75
+ def treated_as_node_end(token)
76
+ return self.pop if current_node.class == TAIL[token]
77
+ if node_in_ancestors?(TAIL[token])
78
+ convert_last_node_into_leaf until current_node.class == TAIL[token]
79
+ return self.pop
80
+ end
81
+ nil
82
+ end
83
+
84
+ def split_into_tokens(str)
85
+ result = []
86
+ while m = token_pat.match(str)
87
+ result.push m.pre_match if m.pre_match
88
+ result.push m[0]
89
+ str = m.post_match
90
+ end
91
+ result.push str unless str.empty?
92
+ result.delete_if {|token| token.empty? }
93
+ result
94
+ end
95
+
96
+ def parse
97
+ while token = @tokens.shift
98
+ next if TAIL[token] and treated_as_node_end(token)
99
+ next if HEAD[token] and self.push HEAD[token].new
100
+ self.push InlineLeaf.create(token)
101
+ end
102
+ self
103
+ end
104
+
105
+ def self.parse(str)
106
+ parser = new(str)
107
+ parser.parse.tree
108
+ end
109
+ end
110
+
111
+ class TableRowParser < InlineParser
112
+ module InlineElement
113
+ class TableCellNode < InlineParser::InlineElement::InlineNode
114
+ attr_accessor :cell_type, :rowspan, :colspan
115
+ end
116
+ end
117
+ include InlineElement
118
+
119
+ TAIL[TableSep] = TableCellNode
120
+ TokenPat[self] = InlineParser::TokenPat[InlineParser]
121
+
122
+ TD, TH, ROW_EXPANDER, COL_EXPANDER, TH_PAT = %w(td th ^ > !)
123
+ MODIFIED_CELL_PAT = /^!?[>^]*/o
124
+
125
+ class InlineElement::TableCellNode
126
+ def parse_first_token(token)
127
+ @cell_type, @rowspan, @colspan, parsed_token = TD, 1, 1, token.dup
128
+ token_str = parsed_token[0]
129
+ m = MODIFIED_CELL_PAT.match(token_str) #if token.kind_of? String
130
+
131
+ if m
132
+ cell_modifiers = m[0].split(//o)
133
+ if cell_modifiers.first == TH_PAT
134
+ cell_modifiers.shift
135
+ @cell_type = TH
136
+ end
137
+ parsed_token[0] = token_str.sub(MODIFIED_CELL_PAT,"")
138
+ @rowspan = cell_modifiers.count(ROW_EXPANDER) + 1
139
+ @colspan = cell_modifiers.count(COL_EXPANDER) + 1
140
+ end
141
+ parsed_token
142
+ end
143
+
144
+ def push(token)
145
+ if self.empty?
146
+ super(parse_first_token(token))
147
+ else
148
+ super(token)
149
+ end
150
+ end
151
+ end
152
+
153
+ def treated_as_node_end(token)
154
+ if token == TableSep
155
+ self.pop
156
+ return (self.push TableCellNode.new)
157
+ end
158
+
159
+ super(token)
160
+ end
161
+
162
+ def parse
163
+ self.push TableCellNode.new
164
+ super
165
+ end
166
+ end
167
+
168
+ include InlineParser::InlineElement
169
+ end
@@ -0,0 +1,235 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudohiki/inlineparser'
4
+ require 'pseudohiki/blockparser'
5
+ require 'ostruct'
6
+
7
+ module PseudoHiki
8
+ class PlainTextFormat
9
+ include InlineParser::InlineElement
10
+ include BlockParser::BlockElement
11
+
12
+ DescSep = [InlineParser::DescSep]
13
+
14
+ class Node < Array
15
+ def to_s
16
+ self.join("")
17
+ end
18
+ end
19
+
20
+ def create_self_element(tree=nil)
21
+ Node.new
22
+ end
23
+
24
+ def visited_result(node)
25
+ visitor = @formatter[node.class]||@formatter[PlainNode]
26
+ node.accept(visitor)
27
+ end
28
+
29
+ def push_visited_results(element, tree)
30
+ tree.each {|token| element.push visited_result(token) }
31
+ end
32
+
33
+ def visit(tree)
34
+ element = create_self_element(tree)
35
+ push_visited_results(element, tree)
36
+ element
37
+ end
38
+
39
+ def initialize(formatter={}, options = { :verbose_mode=> false })
40
+ @formatter = formatter
41
+ options_given_via_block = nil
42
+ if block_given?
43
+ options_given_via_block = yield
44
+ options.merge!(options_given_via_block)
45
+ end
46
+ @options = OpenStruct.new(options)
47
+ end
48
+
49
+ def self.create(options = { :verbose_mode => false })
50
+ formatter = {}
51
+ main = self.new(formatter, options)
52
+
53
+ [
54
+ PlainNode,
55
+ InlineNode,
56
+ EmNode,
57
+ StrongNode,
58
+ PluginNode,
59
+ VerbatimLeaf,
60
+ QuoteLeaf,
61
+ TableLeaf,
62
+ CommentOutLeaf,
63
+ HeadingLeaf,
64
+ ParagraphLeaf,
65
+ HrLeaf,
66
+ BlockNodeEnd,
67
+ ListLeaf,
68
+ EnumLeaf,
69
+ DescNode,
70
+ QuoteNode,
71
+ HeadingNode,
72
+ HrNode,
73
+ ListNode,
74
+ EnumNode,
75
+ ListWrapNode,
76
+ EnumWrapNode
77
+ ].each do |node_class|
78
+ formatter[node_class] = self.new(formatter, options)
79
+ end
80
+
81
+ formatter[InlineLeaf] = InlineLeafFormatter.new(formatter, options)
82
+ formatter[LinkNode] = LinkNodeFormatter.new(formatter, options)
83
+ formatter[DelNode] = DelNodeFormatter.new(formatter, options)
84
+ formatter[DescLeaf] = DescLeafFormatter.new(formatter, options)
85
+ formatter[VerbatimNode] = VerbatimNodeFormatter.new(formatter, options)
86
+ formatter[TableNode] = TableNodeFormatter.new(formatter, options)
87
+ formatter[CommentOutNode] = CommentOutNodeFormatter.new(formatter, options)
88
+ formatter[ParagraphNode] = ParagraphNodeFormatter.new(formatter, options)
89
+ main
90
+ end
91
+
92
+ def get_plain
93
+ @formatter[PlainNode]
94
+ end
95
+
96
+ def format(tree)
97
+ formatter = get_plain
98
+ tree.accept(formatter).join("")
99
+ end
100
+
101
+ ## Definitions of subclasses of PlainTextFormat begins here.
102
+
103
+ class InlineLeafFormatter < self
104
+ def visit(leaf)
105
+ leaf.join("")
106
+ end
107
+ end
108
+
109
+ class LinkNodeFormatter < self
110
+ def visit(tree)
111
+ tree = tree.dup
112
+ element = Node.new
113
+ caption = get_caption(tree)
114
+ begin
115
+ ref = tree.last.join("")
116
+ rescue NoMethodError
117
+ raise NoMethodError unless tree.empty?
118
+ STDERR.puts "No uri is specified for #{caption}"
119
+ end
120
+ if ImageSuffix =~ ref
121
+ element.push (caption||tree).join("")
122
+ else
123
+ element.push caption||tree.join("")
124
+ element.push " (#{tree.join('')})" if @options.verbose_mode and caption
125
+ end
126
+ element
127
+ end
128
+
129
+ def get_caption(tree)
130
+ link_sep_index = tree.find_index([LinkSep])
131
+ return nil unless link_sep_index
132
+ caption_part = tree.shift(link_sep_index)
133
+ tree.shift
134
+ caption_part.map {|element| visited_result(element) }
135
+ end
136
+ end
137
+
138
+ class DelNodeFormatter < self
139
+ def visit(tree)
140
+ return "" unless @options.verbose_mode
141
+ "[deleted:#{tree.map {|token| visited_result(token) }.join}]"
142
+ end
143
+ end
144
+
145
+ class DescLeafFormatter < self
146
+ def visit(tree)
147
+ tree = tree.dup
148
+ element = create_self_element(tree)
149
+ dt_sep_index = tree.index(DescSep)
150
+ if dt_sep_index
151
+ push_visited_results(element, tree.shift(dt_sep_index))
152
+ tree.shift
153
+ end
154
+ dd = tree.map {|token| visited_result(token) }.join("").lstrip
155
+ unless dd.empty?
156
+ element.push element.empty? ? "\t" : ":\t"
157
+ element.push dd
158
+ end
159
+ element
160
+ end
161
+ end
162
+
163
+ class VerbatimNodeFormatter < self
164
+ def visit(tree)
165
+ tree.join("")
166
+ end
167
+ end
168
+
169
+ class TableNodeFormatter < self
170
+ class MalFormedTableError < StandardError; end
171
+ ERROR_MESSAGE = <<ERROR_TEXT
172
+ !! A malformed row is found: %s.
173
+ !! Please recheck if it is really what you want.
174
+ ERROR_TEXT
175
+
176
+ def visit(tree)
177
+ table = create_self_element(tree)
178
+ rows = tree.dup
179
+ rows.length.times { table.push Node.new }
180
+ max_col = tree.map{|row| row.reduce(0) {|sum, cell| sum + cell.colspan }}.max - 1
181
+ max_row = rows.length - 1
182
+ cur_row = nil
183
+ each_cell_with_index(table, max_row, max_col) do |cell, r, c|
184
+ cur_row = rows.shift if c == 0
185
+ next if table[r][c]
186
+ unless cell
187
+ begin
188
+ raise MalFormedTableError.new(ERROR_MESSAGE%[table[r].inspect]) if cur_row.empty?
189
+ table[r][c] = cur_row.shift
190
+ fill_expand(table, r, c, table[r][c])
191
+ rescue
192
+ raise if @options.strict_mode
193
+ STDERR.puts ERROR_MESSAGE%[table[r].inspect]
194
+ next
195
+ end
196
+ end
197
+ end
198
+ table.map {|row| row.join("\t")+$/ }.join("")
199
+ end
200
+
201
+ def each_cell_with_index(table, max_row, max_col, initial_row=0, initial_col=0)
202
+ initial_row.upto(max_row) do |r|
203
+ initial_col.upto(max_col) do |c|
204
+ yield table[r][c], r, c
205
+ end
206
+ end
207
+ end
208
+
209
+ def fill_expand(table, initial_row, initial_col, cur_cell)
210
+ row_expand, col_expand = "", ""
211
+ row_expand, col_expand = "||", "==" if @options.verbose_mode
212
+ max_row = initial_row + cur_cell.rowspan - 1
213
+ max_col = initial_col + cur_cell.colspan - 1
214
+ each_cell_with_index(table, max_row, max_col,
215
+ initial_row, initial_col) do |cell, r, c|
216
+ if initial_row == r and initial_col == c
217
+ table[r][c] = visited_result(cur_cell).join.lstrip.chomp
218
+ next
219
+ end
220
+ table[r][c] = initial_row == r ? col_expand : row_expand
221
+ end
222
+ end
223
+ end
224
+
225
+ class CommentOutNodeFormatter < self
226
+ def visit(tree); ""; end
227
+ end
228
+
229
+ class ParagraphNodeFormatter < self
230
+ def visit(tree)
231
+ super(tree).join+$/
232
+ end
233
+ end
234
+ end
235
+ end
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class TreeStack
4
+
5
+ class NotLeafError < Exception; end
6
+
7
+ module Mergeable; end
8
+
9
+ # a class that includes NodeType is expected to have #push method to include child nodes,
10
+ # and a class that includes LeafType module is expected to have #concat method.
11
+
12
+ module TreeElement
13
+ attr_accessor :depth
14
+
15
+ def accept(visitor)
16
+ visitor.visit(self)
17
+ end
18
+ end
19
+
20
+ module NodeType
21
+ def push_self(stack)
22
+ @depth = stack.current_depth + 1
23
+ stack.push_as_child_node self
24
+ nil
25
+ end
26
+ end
27
+
28
+ module LeafType
29
+ def push_self(stack)
30
+ @depth = stack.current_depth + 1
31
+ stack.push_as_leaf self
32
+ self
33
+ end
34
+
35
+ def merge(leaf)
36
+ raise NotLeafError unless leaf.kind_of? Leaf
37
+ return nil unless leaf.kind_of? Mergeable
38
+ self.concat(leaf)
39
+ end
40
+ end
41
+
42
+ class Node < Array
43
+ include TreeElement
44
+ include NodeType
45
+ end
46
+
47
+ class Leaf < Array
48
+ include TreeElement
49
+ include LeafType
50
+
51
+ def self.create(content=nil)
52
+ self.new.tap {|leaf| leaf.push content if content }
53
+ end
54
+ end
55
+
56
+ class NodeEnd
57
+ def push_self(stack)
58
+ stack.pop
59
+ nil
60
+ end
61
+ end
62
+ attr_reader :node_end, :last_leaf
63
+
64
+ def initialize(root_node=Node.new)
65
+ @stack = [root_node]
66
+ @node_end = NodeEnd.new
67
+ def root_node.depth
68
+ 0
69
+ end
70
+ end
71
+
72
+ def current_node
73
+ @stack.last
74
+ end
75
+
76
+ def tree
77
+ @stack[0]
78
+ end
79
+
80
+ def push(node=Node.new)
81
+ @last_leaf = node.push_self(self)
82
+ node
83
+ end
84
+
85
+ def pop
86
+ @stack.pop if @stack.length > 1
87
+ end
88
+ alias return_to_previous_node pop
89
+
90
+ def current_depth
91
+ @stack.last.depth
92
+ end
93
+
94
+ def push_as_child_node(node)
95
+ @stack.last.push node
96
+ @stack.push node
97
+ end
98
+
99
+ def push_as_leaf(node)
100
+ @stack.last.push node
101
+ end
102
+
103
+ def push_as_sibling(sibling_node=nil)
104
+ sibling_node ||= current_node.class.new
105
+ pop if sibling_node.kind_of? NodeType
106
+ push(sibling_node)
107
+ sibling_node
108
+ end
109
+
110
+ def remove_current_node
111
+ removed_node = self.pop
112
+ self.current_node.pop
113
+ removed_node
114
+ end
115
+
116
+ def accept(visitor)
117
+ visitor.visit(tree)
118
+ end
119
+ end
@@ -0,0 +1,3 @@
1
+ module PseudoHiki
2
+ VERSION = "0.0.0.4.develop"
3
+ end
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "pseudohiki/htmlformat"
4
+ require "pseudohiki/plaintextformat"
5
+ require "pseudohiki/version"
6
+