pseudohikiparser 0.0.0.4.develop

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'htmlelement'
4
+ require 'pseudohiki/inlineparser'
5
+ require 'pseudohiki/htmlformat'
6
+ #require('hikiparser/hikiblockparser')
7
+
8
+ module PseudoHiki
9
+ class HtmlFormat
10
+ class << Formatter[PluginNode]
11
+ def visit(leaf)
12
+ HtmlPlugin.new(@element_name,leaf.join).apply
13
+ end
14
+ end
15
+ end
16
+
17
+ class HtmlPlugin
18
+
19
+ PLUGIN_PAT = /^(\w+)([\s\(]+)/
20
+ NUMBER_RE = /(\d+)/
21
+
22
+ def parse(data)
23
+ result = nil
24
+ if PLUGIN_PAT =~ data
25
+ @plugin_name = $1
26
+ @with_paren = true if $2.chomp == "("
27
+ result = data.chomp.sub(PLUGIN_PAT,"")
28
+ result[-1,1] = "" if @with_paren
29
+ else
30
+ @plugin_name = data.chomp
31
+ result = ""
32
+ end
33
+ result
34
+ end
35
+
36
+ def initialize(tag_type,parsed_data)
37
+ @tag_type = tag_type
38
+ @plugin_name = nil
39
+ @with_paren = nil
40
+ @data = parse(parsed_data.to_s)
41
+ end
42
+
43
+ def apply
44
+ self.send @plugin_name
45
+ end
46
+
47
+ def html
48
+ # "<div class='raw-html'>"+HtmlElement.decode(@data)+"</div>"
49
+ HtmlElement.decode(@data).to_s
50
+ end
51
+
52
+ # def inline
53
+ # lines = HtmlElement.decode(@data).split(/\r*\n/o)
54
+ # lines.shift if lines.first == ""
55
+ # HikiBlockParser.new.parse_lines(lines).join
56
+ # end
57
+
58
+ def anchor
59
+ name, anchor_mark = @data.split(/,\s*/o,2)
60
+ anchor_mark = "_" if (anchor_mark.nil? or anchor_mark.empty?)
61
+ HtmlElement.create("a", anchor_mark,
62
+ "name" => name,
63
+ "href" => "#"+name)
64
+ end
65
+
66
+ def HtmlPlugin.add_chemical_formula(chemical_formula="CO2",en_word="carbon dioxide")
67
+ eval(<<-End)
68
+ def #{chemical_formula.downcase}
69
+ #(display=":cf",second_display=nil)
70
+ display, second_display = @data.split(",\s")
71
+ display = ":cf" unless display
72
+ return [#{chemical_formula.downcase}(display),
73
+ "(",
74
+ #{chemical_formula.downcase}(second_display),
75
+ ")"].join("") if second_display
76
+ case display
77
+ when ":cf"
78
+ "#{chemical_formula}".gsub(NUMBER_RE, "<sub>\\\\1</sub>")
79
+ when ":en"
80
+ "#{en_word}"
81
+ end
82
+ end
83
+ End
84
+ end
85
+ %Q(SF6, sulfur hexafluoride
86
+ CO2, carbon dioxide
87
+ HFC, hydrofluorocarbon
88
+ PFC, perfluorocarbon
89
+ CFC, chlorofluorocarbon
90
+ CH4, methane
91
+ H2O, water
92
+ C2F5Cl, CFC-115, CFC-115).lines.each do |line|
93
+ chemical_formula, en = line.strip.split(/,\s+/)
94
+ add_chemical_formula chemical_formula, en
95
+ end
96
+
97
+ def sq
98
+ # I'm wondering if we'd be better to use &sup2; , but when we search by "km2" for example, we may have problem...
99
+ "#{@data}<sup>2</sup>"
100
+ end
101
+
102
+ def cb
103
+ # I'm wondering if we'd be better to use &sup3; , but...
104
+ "#{@data}<sup>3</sup>"
105
+ end
106
+
107
+ def per
108
+ "#{@data}<sup>-1</sup>"
109
+ end
110
+
111
+ def c_degree
112
+ "&deg;C"
113
+ end
114
+
115
+ def chemical_formula
116
+ @data.gsub(NUMBER_RE, "<sub>\\1</sub>")
117
+ end
118
+
119
+ def iso
120
+ @data.scan(/\A(\d+)([^\d].*)/o) do |data|
121
+ weight, molecule = data
122
+ if self.respond_to? molecule
123
+ return "<sup>#{weight}</sup>" + HtmlPlugin.new("",molecule).apply
124
+ else
125
+ return "<sup>#{weight}</sup>" + molecule
126
+ end
127
+ end
128
+ end
129
+
130
+ alias oc c_degree
131
+
132
+
133
+ def method_missing
134
+ HtmlElement.create(@tag_type, @data, "class" => "plugin")
135
+ end
136
+ end
137
+ end
138
+
139
+ if $0 == __FILE__
140
+ p HtmlPlugin.new("div","html(
141
+ <ul>
142
+ <li>list
143
+ <li>list
144
+ </ul>)").apply
145
+ p HtmlPlugin.new("div","inline(
146
+ *list
147
+ *list
148
+ )").apply
149
+
150
+ p HtmlPlugin.new("div","co2").apply
151
+ p HtmlPlugin.new("div","co2 :en").apply
152
+ p HtmlPlugin.new("div","cb(3km)").apply
153
+ p HtmlPlugin.new("div","per m").apply
154
+ p HtmlPlugin.new("div","iso 18co2").apply
155
+ end
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudohiki/treestack'
4
+ require 'htmlelement'
5
+
6
+ module PseudoHiki
7
+ PROTOCOL = /^((https?|file|ftp):|\.?\/)/
8
+ RELATIVE_PATH = /^\./o
9
+ ROOT_PATH = /^(\/|\\\\|[A-Za-z]:\\)/o
10
+ FILE_MARK = "file:///"
11
+ ImageSuffix = /\.(jpg|jpeg|gif|png|bmp)$/io
12
+
13
+ def self.compile_token_pat(*token_sets)
14
+ tokens = token_sets.flatten.uniq.sort do |x,y|
15
+ [y.length, y] <=> [x.length, x]
16
+ end.collect {|token| Regexp.escape(token) }
17
+ Regexp.new(tokens.join("|"))
18
+ end
19
+
20
+ class InlineParser < TreeStack
21
+ module InlineElement
22
+ class InlineNode < InlineParser::Node;end
23
+ class InlineLeaf < InlineParser::Leaf; end
24
+ # class LinkSepLeaf < InlineLeaf; end
25
+
26
+ class LinkNode < InlineNode; end
27
+ class EmNode < InlineNode; end
28
+ class StrongNode < InlineNode; end
29
+ class DelNode < InlineNode; end
30
+ class PlainNode < InlineNode; end
31
+ class PluginNode < InlineNode; end
32
+
33
+ LinkSep, TableSep, DescSep = %w(| || :)
34
+ end
35
+ include InlineElement
36
+
37
+ HEAD = {}
38
+ TAIL = {}
39
+ NodeTypeToHead = {}
40
+ TokenPat = {}
41
+
42
+ [[LinkNode, "[[", "]]"],
43
+ [EmNode, "''", "''"],
44
+ [StrongNode, "'''", "'''"],
45
+ [DelNode, "==", "=="],
46
+ [PluginNode, "{{","}}"]].each do |type, head, tail|
47
+ HEAD[head] = type
48
+ TAIL[tail] = type
49
+ NodeTypeToHead[type] = head
50
+ end
51
+
52
+ TokenPat[self] = PseudoHiki.compile_token_pat(HEAD.keys,TAIL.keys,[LinkSep, TableSep, DescSep])
53
+
54
+ def token_pat
55
+ TokenPat[self.class]
56
+ end
57
+
58
+ def initialize(str)
59
+ @tokens = split_into_tokens(str)
60
+ super()
61
+ end
62
+
63
+ def convert_last_node_into_leaf
64
+ last_node = remove_current_node
65
+ tag_head = NodeTypeToHead[last_node.class]
66
+ tag_head_leaf = InlineLeaf.create(tag_head)
67
+ self.push tag_head_leaf
68
+ last_node.each {|leaf| self.push_as_leaf leaf }
69
+ end
70
+
71
+ def node_in_ancestors?(node_class)
72
+ not @stack.select {|node| node_class == node.class }.empty?
73
+ end
74
+
75
+ def treated_as_node_end(token)
76
+ return self.pop if current_node.class == TAIL[token]
77
+ if node_in_ancestors?(TAIL[token])
78
+ convert_last_node_into_leaf until current_node.class == TAIL[token]
79
+ return self.pop
80
+ end
81
+ nil
82
+ end
83
+
84
+ def split_into_tokens(str)
85
+ result = []
86
+ while m = token_pat.match(str)
87
+ result.push m.pre_match if m.pre_match
88
+ result.push m[0]
89
+ str = m.post_match
90
+ end
91
+ result.push str unless str.empty?
92
+ result.delete_if {|token| token.empty? }
93
+ result
94
+ end
95
+
96
+ def parse
97
+ while token = @tokens.shift
98
+ next if TAIL[token] and treated_as_node_end(token)
99
+ next if HEAD[token] and self.push HEAD[token].new
100
+ self.push InlineLeaf.create(token)
101
+ end
102
+ self
103
+ end
104
+
105
+ def self.parse(str)
106
+ parser = new(str)
107
+ parser.parse.tree
108
+ end
109
+ end
110
+
111
+ class TableRowParser < InlineParser
112
+ module InlineElement
113
+ class TableCellNode < InlineParser::InlineElement::InlineNode
114
+ attr_accessor :cell_type, :rowspan, :colspan
115
+ end
116
+ end
117
+ include InlineElement
118
+
119
+ TAIL[TableSep] = TableCellNode
120
+ TokenPat[self] = InlineParser::TokenPat[InlineParser]
121
+
122
+ TD, TH, ROW_EXPANDER, COL_EXPANDER, TH_PAT = %w(td th ^ > !)
123
+ MODIFIED_CELL_PAT = /^!?[>^]*/o
124
+
125
+ class InlineElement::TableCellNode
126
+ def parse_first_token(token)
127
+ @cell_type, @rowspan, @colspan, parsed_token = TD, 1, 1, token.dup
128
+ token_str = parsed_token[0]
129
+ m = MODIFIED_CELL_PAT.match(token_str) #if token.kind_of? String
130
+
131
+ if m
132
+ cell_modifiers = m[0].split(//o)
133
+ if cell_modifiers.first == TH_PAT
134
+ cell_modifiers.shift
135
+ @cell_type = TH
136
+ end
137
+ parsed_token[0] = token_str.sub(MODIFIED_CELL_PAT,"")
138
+ @rowspan = cell_modifiers.count(ROW_EXPANDER) + 1
139
+ @colspan = cell_modifiers.count(COL_EXPANDER) + 1
140
+ end
141
+ parsed_token
142
+ end
143
+
144
+ def push(token)
145
+ if self.empty?
146
+ super(parse_first_token(token))
147
+ else
148
+ super(token)
149
+ end
150
+ end
151
+ end
152
+
153
+ def treated_as_node_end(token)
154
+ if token == TableSep
155
+ self.pop
156
+ return (self.push TableCellNode.new)
157
+ end
158
+
159
+ super(token)
160
+ end
161
+
162
+ def parse
163
+ self.push TableCellNode.new
164
+ super
165
+ end
166
+ end
167
+
168
+ include InlineParser::InlineElement
169
+ end
@@ -0,0 +1,235 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudohiki/inlineparser'
4
+ require 'pseudohiki/blockparser'
5
+ require 'ostruct'
6
+
7
+ module PseudoHiki
8
+ class PlainTextFormat
9
+ include InlineParser::InlineElement
10
+ include BlockParser::BlockElement
11
+
12
+ DescSep = [InlineParser::DescSep]
13
+
14
+ class Node < Array
15
+ def to_s
16
+ self.join("")
17
+ end
18
+ end
19
+
20
+ def create_self_element(tree=nil)
21
+ Node.new
22
+ end
23
+
24
+ def visited_result(node)
25
+ visitor = @formatter[node.class]||@formatter[PlainNode]
26
+ node.accept(visitor)
27
+ end
28
+
29
+ def push_visited_results(element, tree)
30
+ tree.each {|token| element.push visited_result(token) }
31
+ end
32
+
33
+ def visit(tree)
34
+ element = create_self_element(tree)
35
+ push_visited_results(element, tree)
36
+ element
37
+ end
38
+
39
+ def initialize(formatter={}, options = { :verbose_mode=> false })
40
+ @formatter = formatter
41
+ options_given_via_block = nil
42
+ if block_given?
43
+ options_given_via_block = yield
44
+ options.merge!(options_given_via_block)
45
+ end
46
+ @options = OpenStruct.new(options)
47
+ end
48
+
49
+ def self.create(options = { :verbose_mode => false })
50
+ formatter = {}
51
+ main = self.new(formatter, options)
52
+
53
+ [
54
+ PlainNode,
55
+ InlineNode,
56
+ EmNode,
57
+ StrongNode,
58
+ PluginNode,
59
+ VerbatimLeaf,
60
+ QuoteLeaf,
61
+ TableLeaf,
62
+ CommentOutLeaf,
63
+ HeadingLeaf,
64
+ ParagraphLeaf,
65
+ HrLeaf,
66
+ BlockNodeEnd,
67
+ ListLeaf,
68
+ EnumLeaf,
69
+ DescNode,
70
+ QuoteNode,
71
+ HeadingNode,
72
+ HrNode,
73
+ ListNode,
74
+ EnumNode,
75
+ ListWrapNode,
76
+ EnumWrapNode
77
+ ].each do |node_class|
78
+ formatter[node_class] = self.new(formatter, options)
79
+ end
80
+
81
+ formatter[InlineLeaf] = InlineLeafFormatter.new(formatter, options)
82
+ formatter[LinkNode] = LinkNodeFormatter.new(formatter, options)
83
+ formatter[DelNode] = DelNodeFormatter.new(formatter, options)
84
+ formatter[DescLeaf] = DescLeafFormatter.new(formatter, options)
85
+ formatter[VerbatimNode] = VerbatimNodeFormatter.new(formatter, options)
86
+ formatter[TableNode] = TableNodeFormatter.new(formatter, options)
87
+ formatter[CommentOutNode] = CommentOutNodeFormatter.new(formatter, options)
88
+ formatter[ParagraphNode] = ParagraphNodeFormatter.new(formatter, options)
89
+ main
90
+ end
91
+
92
+ def get_plain
93
+ @formatter[PlainNode]
94
+ end
95
+
96
+ def format(tree)
97
+ formatter = get_plain
98
+ tree.accept(formatter).join("")
99
+ end
100
+
101
+ ## Definitions of subclasses of PlainTextFormat begins here.
102
+
103
+ class InlineLeafFormatter < self
104
+ def visit(leaf)
105
+ leaf.join("")
106
+ end
107
+ end
108
+
109
+ class LinkNodeFormatter < self
110
+ def visit(tree)
111
+ tree = tree.dup
112
+ element = Node.new
113
+ caption = get_caption(tree)
114
+ begin
115
+ ref = tree.last.join("")
116
+ rescue NoMethodError
117
+ raise NoMethodError unless tree.empty?
118
+ STDERR.puts "No uri is specified for #{caption}"
119
+ end
120
+ if ImageSuffix =~ ref
121
+ element.push (caption||tree).join("")
122
+ else
123
+ element.push caption||tree.join("")
124
+ element.push " (#{tree.join('')})" if @options.verbose_mode and caption
125
+ end
126
+ element
127
+ end
128
+
129
+ def get_caption(tree)
130
+ link_sep_index = tree.find_index([LinkSep])
131
+ return nil unless link_sep_index
132
+ caption_part = tree.shift(link_sep_index)
133
+ tree.shift
134
+ caption_part.map {|element| visited_result(element) }
135
+ end
136
+ end
137
+
138
+ class DelNodeFormatter < self
139
+ def visit(tree)
140
+ return "" unless @options.verbose_mode
141
+ "[deleted:#{tree.map {|token| visited_result(token) }.join}]"
142
+ end
143
+ end
144
+
145
+ class DescLeafFormatter < self
146
+ def visit(tree)
147
+ tree = tree.dup
148
+ element = create_self_element(tree)
149
+ dt_sep_index = tree.index(DescSep)
150
+ if dt_sep_index
151
+ push_visited_results(element, tree.shift(dt_sep_index))
152
+ tree.shift
153
+ end
154
+ dd = tree.map {|token| visited_result(token) }.join("").lstrip
155
+ unless dd.empty?
156
+ element.push element.empty? ? "\t" : ":\t"
157
+ element.push dd
158
+ end
159
+ element
160
+ end
161
+ end
162
+
163
+ class VerbatimNodeFormatter < self
164
+ def visit(tree)
165
+ tree.join("")
166
+ end
167
+ end
168
+
169
+ class TableNodeFormatter < self
170
+ class MalFormedTableError < StandardError; end
171
+ ERROR_MESSAGE = <<ERROR_TEXT
172
+ !! A malformed row is found: %s.
173
+ !! Please recheck if it is really what you want.
174
+ ERROR_TEXT
175
+
176
+ def visit(tree)
177
+ table = create_self_element(tree)
178
+ rows = tree.dup
179
+ rows.length.times { table.push Node.new }
180
+ max_col = tree.map{|row| row.reduce(0) {|sum, cell| sum + cell.colspan }}.max - 1
181
+ max_row = rows.length - 1
182
+ cur_row = nil
183
+ each_cell_with_index(table, max_row, max_col) do |cell, r, c|
184
+ cur_row = rows.shift if c == 0
185
+ next if table[r][c]
186
+ unless cell
187
+ begin
188
+ raise MalFormedTableError.new(ERROR_MESSAGE%[table[r].inspect]) if cur_row.empty?
189
+ table[r][c] = cur_row.shift
190
+ fill_expand(table, r, c, table[r][c])
191
+ rescue
192
+ raise if @options.strict_mode
193
+ STDERR.puts ERROR_MESSAGE%[table[r].inspect]
194
+ next
195
+ end
196
+ end
197
+ end
198
+ table.map {|row| row.join("\t")+$/ }.join("")
199
+ end
200
+
201
+ def each_cell_with_index(table, max_row, max_col, initial_row=0, initial_col=0)
202
+ initial_row.upto(max_row) do |r|
203
+ initial_col.upto(max_col) do |c|
204
+ yield table[r][c], r, c
205
+ end
206
+ end
207
+ end
208
+
209
+ def fill_expand(table, initial_row, initial_col, cur_cell)
210
+ row_expand, col_expand = "", ""
211
+ row_expand, col_expand = "||", "==" if @options.verbose_mode
212
+ max_row = initial_row + cur_cell.rowspan - 1
213
+ max_col = initial_col + cur_cell.colspan - 1
214
+ each_cell_with_index(table, max_row, max_col,
215
+ initial_row, initial_col) do |cell, r, c|
216
+ if initial_row == r and initial_col == c
217
+ table[r][c] = visited_result(cur_cell).join.lstrip.chomp
218
+ next
219
+ end
220
+ table[r][c] = initial_row == r ? col_expand : row_expand
221
+ end
222
+ end
223
+ end
224
+
225
+ class CommentOutNodeFormatter < self
226
+ def visit(tree); ""; end
227
+ end
228
+
229
+ class ParagraphNodeFormatter < self
230
+ def visit(tree)
231
+ super(tree).join+$/
232
+ end
233
+ end
234
+ end
235
+ end
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class TreeStack
4
+
5
+ class NotLeafError < Exception; end
6
+
7
+ module Mergeable; end
8
+
9
+ # a class that includes NodeType is expected to have #push method to include child nodes,
10
+ # and a class that includes LeafType module is expected to have #concat method.
11
+
12
+ module TreeElement
13
+ attr_accessor :depth
14
+
15
+ def accept(visitor)
16
+ visitor.visit(self)
17
+ end
18
+ end
19
+
20
+ module NodeType
21
+ def push_self(stack)
22
+ @depth = stack.current_depth + 1
23
+ stack.push_as_child_node self
24
+ nil
25
+ end
26
+ end
27
+
28
+ module LeafType
29
+ def push_self(stack)
30
+ @depth = stack.current_depth + 1
31
+ stack.push_as_leaf self
32
+ self
33
+ end
34
+
35
+ def merge(leaf)
36
+ raise NotLeafError unless leaf.kind_of? Leaf
37
+ return nil unless leaf.kind_of? Mergeable
38
+ self.concat(leaf)
39
+ end
40
+ end
41
+
42
+ class Node < Array
43
+ include TreeElement
44
+ include NodeType
45
+ end
46
+
47
+ class Leaf < Array
48
+ include TreeElement
49
+ include LeafType
50
+
51
+ def self.create(content=nil)
52
+ self.new.tap {|leaf| leaf.push content if content }
53
+ end
54
+ end
55
+
56
+ class NodeEnd
57
+ def push_self(stack)
58
+ stack.pop
59
+ nil
60
+ end
61
+ end
62
+ attr_reader :node_end, :last_leaf
63
+
64
+ def initialize(root_node=Node.new)
65
+ @stack = [root_node]
66
+ @node_end = NodeEnd.new
67
+ def root_node.depth
68
+ 0
69
+ end
70
+ end
71
+
72
+ def current_node
73
+ @stack.last
74
+ end
75
+
76
+ def tree
77
+ @stack[0]
78
+ end
79
+
80
+ def push(node=Node.new)
81
+ @last_leaf = node.push_self(self)
82
+ node
83
+ end
84
+
85
+ def pop
86
+ @stack.pop if @stack.length > 1
87
+ end
88
+ alias return_to_previous_node pop
89
+
90
+ def current_depth
91
+ @stack.last.depth
92
+ end
93
+
94
+ def push_as_child_node(node)
95
+ @stack.last.push node
96
+ @stack.push node
97
+ end
98
+
99
+ def push_as_leaf(node)
100
+ @stack.last.push node
101
+ end
102
+
103
+ def push_as_sibling(sibling_node=nil)
104
+ sibling_node ||= current_node.class.new
105
+ pop if sibling_node.kind_of? NodeType
106
+ push(sibling_node)
107
+ sibling_node
108
+ end
109
+
110
+ def remove_current_node
111
+ removed_node = self.pop
112
+ self.current_node.pop
113
+ removed_node
114
+ end
115
+
116
+ def accept(visitor)
117
+ visitor.visit(tree)
118
+ end
119
+ end
@@ -0,0 +1,3 @@
1
+ module PseudoHiki
2
+ VERSION = "0.0.0.4.develop"
3
+ end
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "pseudohiki/htmlformat"
4
+ require "pseudohiki/plaintextformat"
5
+ require "pseudohiki/version"
6
+