mediacloth 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,6 +16,10 @@ class WikiAST < AST
16
16
 
17
17
  end
18
18
 
19
+ #The node to represent paragraph with text inside
20
+ class ParagraphAST < AST
21
+ end
22
+
19
23
  #The node to represent a simple or formatted text
20
24
  #with more AST nodes inside.
21
25
  class FormattedAST < AST
@@ -31,7 +35,7 @@ end
31
35
  #The node to represent a list
32
36
  class ListAST < AST
33
37
  #Currently recognized types: :Bulleted, :Numbered
34
- attr_accessor :type
38
+ attr_accessor :list_type
35
39
  end
36
40
 
37
41
  #The node to represent a list item
@@ -23,6 +23,12 @@ protected
23
23
  super(ast)
24
24
  end
25
25
 
26
+ def parse_paragraph(ast)
27
+ @html += "<p>"
28
+ super(ast)
29
+ @html += "</p>"
30
+ end
31
+
26
32
  def parse_text(ast)
27
33
  tag = formatting_to_tag(ast)
28
34
  if tag[0].empty?
@@ -95,9 +101,9 @@ private
95
101
 
96
102
  #returns a tag name of the list in ast node
97
103
  def list_tag(ast)
98
- if ast.type == :Bulleted
104
+ if ast.list_type == :Bulleted
99
105
  return "ul"
100
- elsif ast.type == :Numbered
106
+ elsif ast.list_type == :Numbered
101
107
  return "ol"
102
108
  end
103
109
  end
@@ -44,6 +44,7 @@ class MediaWikiLexer
44
44
  @lexer_table["-"] = method(:match_line)
45
45
  @lexer_table["~"] = method(:match_signature)
46
46
  @lexer_table["h"] = method(:match_inline_link)
47
+ @lexer_table["\n"] = method(:match_newline)
47
48
  end
48
49
 
49
50
  #Transforms input stream (string) into the stream of tokens.
@@ -52,6 +53,7 @@ class MediaWikiLexer
52
53
  #modification. The last token [false, false] inficates EOF.
53
54
  def tokenize(input)
54
55
  @tokens = []
56
+ start_para
55
57
  @cursor = 0
56
58
  @text = input
57
59
  @next_token = []
@@ -68,7 +70,27 @@ class MediaWikiLexer
68
70
  @current_token[1] += @text[@token_start, 1]
69
71
  else
70
72
  #skip empty :TEXT tokens
71
- @tokens << @current_token unless empty_text_token?
73
+ unless empty_text_token?
74
+ @tokens << @current_token
75
+ unless para_breaker?(@next_token[0])
76
+ #if no paragraph was previously started
77
+ #then we should start it
78
+ start_para if !@para
79
+ else
80
+ #if we already have a paragraph this is the time to close it
81
+ end_para if @para
82
+ end
83
+
84
+ end
85
+
86
+ if para_breaker?(@next_token[0])
87
+ if @tokens.last and @tokens.last[0] == :PARA_START
88
+ #we need to remove para start token because no para end is possible
89
+ @tokens.pop
90
+ @para = false
91
+ end
92
+ end
93
+
72
94
  @next_token[1] = @text[@token_start, @cursor - @token_start]
73
95
  @tokens << @next_token
74
96
  #hack to enable sub-lexing!
@@ -77,6 +99,10 @@ class MediaWikiLexer
77
99
  @sub_tokens = nil
78
100
  end
79
101
  #end of hack!
102
+
103
+ #if the next token can start the paragraph, let's try that
104
+ start_para if @tokens.last and para_starter?(@tokens.last[0])
105
+
80
106
  @current_token = nil
81
107
  @next_token = []
82
108
  end
@@ -84,6 +110,13 @@ class MediaWikiLexer
84
110
  #add the last TEXT token if it exists
85
111
  @tokens << @current_token if @current_token and not empty_text_token?
86
112
 
113
+ #remove empty para start or finish the paragraph if necessary
114
+ if @tokens.last and @tokens.last[0] == :PARA_START
115
+ @tokens.pop
116
+ @para = false
117
+ else
118
+ end_para if @para
119
+ end
87
120
  #RACC wants us to put this to indicate EOF
88
121
  @tokens << [false, false]
89
122
  @tokens
@@ -98,6 +131,18 @@ class MediaWikiLexer
98
131
 
99
132
 
100
133
  private
134
+ #Returns true if the token breaks the paragraph.
135
+ def para_breaker?(token)
136
+ [:SECTION_START, :SECTION_END,
137
+ :UL_START, :UL_END, :OL_START, :OL_END,
138
+ :DL_START, :DL_END, :HLINE, :PRE].include?(token)
139
+ end
140
+
141
+ #Returns true if the paragraph can be started after the token
142
+ def para_starter?(token)
143
+ [:SECTION_END, :UL_END, :OL_END, :DL_END, :HLINE, :PRE].include?(token)
144
+ end
145
+
101
146
  #-- ================== Match methods ================== ++#
102
147
 
103
148
  #Matches anything that was not matched. Returns :TEXT to indicate
@@ -145,17 +190,17 @@ private
145
190
  end
146
191
 
147
192
  #Matches sections
148
- # "=+" { return SECTION; }
149
193
  def match_section
150
- if (@text[@cursor-1, 1] == "\n") or (@pair_stack.last[0] == :SECTION)
194
+ if at_start_of_line? or (@pair_stack.last[0] == :SECTION_START)
151
195
  i = 0
152
196
  i += 1 while @text[@cursor+i, 1] == "="
153
197
  @cursor += i
154
- @next_token[0] = :SECTION
155
198
 
156
- if @pair_stack.last[0] == :SECTION
199
+ if @pair_stack.last[0] == :SECTION_START
200
+ @next_token[0] = :SECTION_END
157
201
  @pair_stack.pop
158
202
  else
203
+ @next_token[0] = :SECTION_START
159
204
  @pair_stack.push @next_token
160
205
  end
161
206
  else
@@ -326,6 +371,20 @@ private
326
371
  end
327
372
  end
328
373
 
374
+ #Matches new line and breaks the paragraph if two newlines are met
375
+ def match_newline
376
+ if @text[@cursor, 2] == "\n\n"
377
+ if @para
378
+ @next_token[0] = :PARA_END
379
+ # @para = false
380
+ @sub_tokens = [[:PARA_START, ""]]
381
+ @cursor += 2
382
+ return
383
+ end
384
+ end
385
+ match_other
386
+ end
387
+
329
388
  #-- ================== Helper methods ================== ++#
330
389
 
331
390
  #Checks if the token is placed at the start of the line.
@@ -359,10 +418,16 @@ private
359
418
  end
360
419
 
361
420
  #Runs sublexer to tokenize sub_text
362
- def sub_lex(sub_text)
421
+ def sub_lex(sub_text, strip_paragraphs=true)
363
422
  sub_lexer = MediaWikiLexer.new
364
423
  sub_tokens = sub_lexer.tokenize(sub_text)
365
- sub_tokens.pop
424
+ sub_tokens.pop #false token
425
+ if strip_paragraphs
426
+ #the last PARA_END token
427
+ sub_tokens.pop if sub_tokens.last[0] == :PARA_END
428
+ #the first PARA_START token
429
+ sub_tokens.delete_at(0) if sub_tokens[0][0] == :PARA_START
430
+ end
366
431
  sub_tokens
367
432
  end
368
433
 
@@ -403,5 +468,15 @@ private
403
468
  list
404
469
  end
405
470
 
471
+ def start_para
472
+ @tokens << [:PARA_START, ""]
473
+ @para = true
474
+ end
475
+
476
+ def end_para
477
+ @tokens << [:PARA_END, ""]
478
+ @para = false
479
+ end
480
+
406
481
  end
407
482