boilerpipe-ruby 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +27 -6
- data/Rakefile +8 -0
- data/boilerpipe-ruby.gemspec +10 -9
- data/lib/boilerpipe.rb +30 -0
- data/lib/boilerpipe/document/text_block.rb +113 -0
- data/lib/boilerpipe/document/text_document.rb +44 -0
- data/lib/boilerpipe/errors.rb +1 -0
- data/lib/boilerpipe/extractors/article_extractor.rb +52 -0
- data/lib/boilerpipe/filters/block_proximity_fusion.rb +63 -0
- data/lib/boilerpipe/filters/boilerplate_block_filter.rb +26 -0
- data/lib/boilerpipe/filters/document_title_match_classifier.rb +121 -0
- data/lib/boilerpipe/filters/expand_title_to_content_filter.rb +43 -0
- data/lib/boilerpipe/filters/heuristic_filter_base.rb +7 -0
- data/lib/boilerpipe/filters/ignore_blocks_after_content_filter.rb +24 -0
- data/lib/boilerpipe/filters/keep_largest_block_filter.rb +62 -0
- data/lib/boilerpipe/filters/large_block_same_tag_level_to_content_filter.rb +29 -0
- data/lib/boilerpipe/filters/list_at_end_filter.rb +25 -0
- data/lib/boilerpipe/filters/num_words_rules_classifier.rb +42 -0
- data/lib/boilerpipe/filters/terminating_blocks_finder.rb +44 -0
- data/lib/boilerpipe/filters/trailing_headline_to_boilerplate_filter.rb +24 -0
- data/lib/boilerpipe/labels/default.rb +17 -0
- data/lib/boilerpipe/labels/label_action.rb +17 -0
- data/lib/boilerpipe/sax/boilerpipe_html_parser.rb +24 -0
- data/lib/boilerpipe/sax/html_content_handler.rb +275 -0
- data/lib/boilerpipe/sax/tag_action_map.rb +51 -0
- data/lib/boilerpipe/sax/tag_actions/anchor_text.rb +49 -0
- data/lib/boilerpipe/sax/tag_actions/block_level.rb +17 -0
- data/lib/boilerpipe/sax/tag_actions/block_tag_label.rb +22 -0
- data/lib/boilerpipe/sax/tag_actions/body.rb +21 -0
- data/lib/boilerpipe/sax/tag_actions/chained.rb +20 -0
- data/lib/boilerpipe/sax/tag_actions/font.rb +40 -0
- data/lib/boilerpipe/sax/tag_actions/ignorable_element.rb +18 -0
- data/lib/boilerpipe/sax/tag_actions/inline_no_whitespace.rb +16 -0
- data/lib/boilerpipe/sax/tag_actions/inline_tag_label.rb +24 -0
- data/lib/boilerpipe/sax/tag_actions/inline_whitespace.rb +18 -0
- data/lib/boilerpipe/util/unicode_tokenizer.rb +2 -2
- data/lib/boilerpipe/version.rb +1 -1
- data/stuff.txt +4 -0
- metadata +61 -15
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Boilerpipe::Labels
|
|
2
|
+
class LabelAction
|
|
3
|
+
attr_reader :labels
|
|
4
|
+
|
|
5
|
+
def initialize(labels=[])
|
|
6
|
+
@labels = labels
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def add_to(text_block)
|
|
10
|
+
text_block.add_labels(@labels)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def to_s
|
|
14
|
+
@labels.join(',')
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
module Boilerpipe::SAX
|
|
3
|
+
class BoilerpipeHTMLParser
|
|
4
|
+
def self.parse(text)
|
|
5
|
+
|
|
6
|
+
#script bug - delete script tags
|
|
7
|
+
text = text.gsub(/\<script>.+?<\/script>/i, '')
|
|
8
|
+
|
|
9
|
+
# nokogiri uses libxml for mri and nekohtml for jruby
|
|
10
|
+
# mri doesn't remove when missing the semicolon
|
|
11
|
+
text = text.gsub(/( ) /, '\1; ')
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# use nokogiri to fix any bad tags, errors - keep experimenting with this
|
|
15
|
+
text = Nokogiri::HTML(text).to_html
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
handler = HTMLContentHandler.new
|
|
19
|
+
noko_parser = Nokogiri::HTML::SAX::Parser.new(handler)
|
|
20
|
+
noko_parser.parse(text)
|
|
21
|
+
handler.text_document
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
require 'set'
|
|
3
|
+
|
|
4
|
+
module Boilerpipe::SAX
|
|
5
|
+
class HTMLContentHandler < Nokogiri::XML::SAX::Document
|
|
6
|
+
attr_reader :in_ignorable_element, :label_stacks, :last_start_tag
|
|
7
|
+
|
|
8
|
+
attr_accessor :in_anchor_tag, :token_buffer ,:font_size_stack
|
|
9
|
+
ANCHOR_TEXT_START = "$\ue00a<"
|
|
10
|
+
ANCHOR_TEXT_END = ">\ue00a$"
|
|
11
|
+
|
|
12
|
+
def initialize
|
|
13
|
+
@label_stacks = []
|
|
14
|
+
@tag_actions = ::Boilerpipe::SAX::TagActionMap.tag_actions
|
|
15
|
+
@tag_level = 0
|
|
16
|
+
@sb_last_was_whitespace = false
|
|
17
|
+
@text_buffer = ''
|
|
18
|
+
@token_buffer = ''
|
|
19
|
+
@offset_blocks = 0
|
|
20
|
+
@flush = false
|
|
21
|
+
@block_tag_level = -1
|
|
22
|
+
|
|
23
|
+
@in_body = 0
|
|
24
|
+
@in_anchor_tag = 0
|
|
25
|
+
@in_ignorable_element = 0
|
|
26
|
+
@in_anchor_text = false
|
|
27
|
+
@font_size_stack = []
|
|
28
|
+
@last_start_tag = ''
|
|
29
|
+
@title
|
|
30
|
+
@text_blocks = []
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def start_element(name, attrs = [])
|
|
34
|
+
@label_stacks << nil
|
|
35
|
+
tag = name.upcase.intern
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
tag_action = @tag_actions[tag]
|
|
39
|
+
if tag_action
|
|
40
|
+
@tag_level += 1 if tag_action.changes_tag_level?
|
|
41
|
+
@flush = tag_action.start(self, name, attrs) | @flush
|
|
42
|
+
else
|
|
43
|
+
@tag_level += 1
|
|
44
|
+
@flush = true
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
@last_event = :START_TAG
|
|
48
|
+
@last_start_tag = tag
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def characters(text)
|
|
52
|
+
flush_block if @flush
|
|
53
|
+
|
|
54
|
+
return if @in_ignorable_element != 0
|
|
55
|
+
return if text.empty?
|
|
56
|
+
|
|
57
|
+
# replace all whitespace with simple space
|
|
58
|
+
text.gsub!(/\s+/, ' ')
|
|
59
|
+
|
|
60
|
+
# trim whitespace
|
|
61
|
+
started_with_whitespace = text =~ /^\s/
|
|
62
|
+
ended_with_whitespace = text =~ /\s$/
|
|
63
|
+
text.strip!
|
|
64
|
+
|
|
65
|
+
# add a single space if the block was only whitespace
|
|
66
|
+
if text.empty?
|
|
67
|
+
append_space
|
|
68
|
+
@last_event = :WHITESPACE
|
|
69
|
+
return
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# set block levels
|
|
73
|
+
@block_tag_level = @tag_level if @block_tag_level == -1
|
|
74
|
+
|
|
75
|
+
append_space if started_with_whitespace
|
|
76
|
+
append_text(text)
|
|
77
|
+
append_space if ended_with_whitespace
|
|
78
|
+
|
|
79
|
+
@last_event = :CHARACTERS
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def end_element(name)
|
|
83
|
+
tag = name.upcase.intern
|
|
84
|
+
tag_action = @tag_actions[tag]
|
|
85
|
+
if tag_action
|
|
86
|
+
@flush = tag_action.end_tag(self, name) | @flush
|
|
87
|
+
else
|
|
88
|
+
@flush = true
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
@tag_level -= 1 if tag_action.nil? || tag_action.changes_tag_level?
|
|
92
|
+
flush_block if @flush
|
|
93
|
+
|
|
94
|
+
@last_event = :END_TAG
|
|
95
|
+
@last_end_tag = tag
|
|
96
|
+
@label_stacks.pop
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def flush_block
|
|
100
|
+
@flush = false
|
|
101
|
+
if @in_body == 0
|
|
102
|
+
@title = @token_buffer.strip if :TITLE == @last_start_tag
|
|
103
|
+
clear_buffers
|
|
104
|
+
return
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# clear out if empty or just a space
|
|
108
|
+
length = @token_buffer.size
|
|
109
|
+
case length
|
|
110
|
+
when 0
|
|
111
|
+
return
|
|
112
|
+
when 1
|
|
113
|
+
clear_buffers if @sb_last_was_whitespace
|
|
114
|
+
return
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
num_tokens = 0
|
|
118
|
+
num_words = 0
|
|
119
|
+
num_words_current_line = 0
|
|
120
|
+
num_words_in_wrapped_lines = 0
|
|
121
|
+
num_wrapped_lines = 0
|
|
122
|
+
num_linked_words = 0
|
|
123
|
+
current_line_length = 0
|
|
124
|
+
max_line_length = 80
|
|
125
|
+
|
|
126
|
+
tokens = ::Boilerpipe::UnicodeTokenizer.tokenize(@token_buffer)
|
|
127
|
+
tokens.each do |token|
|
|
128
|
+
if ANCHOR_TEXT_START == token
|
|
129
|
+
@in_anchor_text = true
|
|
130
|
+
elsif ANCHOR_TEXT_END == token
|
|
131
|
+
@in_anchor_text = false
|
|
132
|
+
elsif is_word?(token)
|
|
133
|
+
num_tokens += 1
|
|
134
|
+
num_words += 1
|
|
135
|
+
num_words_current_line += 1
|
|
136
|
+
num_linked_words += 1 if @in_anchor_text
|
|
137
|
+
token_length = token.size
|
|
138
|
+
current_line_length += token_length + 1
|
|
139
|
+
|
|
140
|
+
if current_line_length > max_line_length
|
|
141
|
+
num_wrapped_lines += 1
|
|
142
|
+
current_line_length = token_length
|
|
143
|
+
num_words_current_line = 1
|
|
144
|
+
end
|
|
145
|
+
else
|
|
146
|
+
num_tokens += 1
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
return if num_tokens == 0
|
|
151
|
+
|
|
152
|
+
num_words_in_wrapped_lines = 0
|
|
153
|
+
if num_wrapped_lines == 0
|
|
154
|
+
num_words_in_wrapped_lines = num_words
|
|
155
|
+
num_wrapped_lines = 1
|
|
156
|
+
else
|
|
157
|
+
num_words_in_wrapped_lines = num_words - num_words_current_line
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
text_block = ::Boilerpipe::Document::TextBlock.new(@text_buffer.strip,
|
|
161
|
+
num_words,
|
|
162
|
+
num_linked_words,
|
|
163
|
+
num_words_in_wrapped_lines,
|
|
164
|
+
num_wrapped_lines, @offset_blocks)
|
|
165
|
+
|
|
166
|
+
@offset_blocks += 1
|
|
167
|
+
clear_buffers
|
|
168
|
+
text_block.set_tag_level(@block_tag_level)
|
|
169
|
+
add_text_block(text_block)
|
|
170
|
+
@block_tag_level = -1
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def text_document
|
|
174
|
+
flush_block
|
|
175
|
+
::Boilerpipe::Document::TextDocument.new(@title, @text_blocks)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def token_buffer_size
|
|
179
|
+
@token_buffer.size
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
VALID_WORD_CHARACTER = /[\p{L}\p{Nd}\p{Nl}\p{No}]/
|
|
183
|
+
# unicode regex - categories
|
|
184
|
+
# \p{L} -- Letter
|
|
185
|
+
# \p{Nd} -- a decimal digit
|
|
186
|
+
# \p{Nl} -- a letterlike numeric character
|
|
187
|
+
# \p{No} -- a numeric character of other type
|
|
188
|
+
|
|
189
|
+
def is_word?(word)
|
|
190
|
+
word =~ VALID_WORD_CHARACTER
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
#public void flushBlock() {
|
|
194
|
+
# int numWords = 0;
|
|
195
|
+
# int numLinkedWords = 0;
|
|
196
|
+
# int numWrappedLines = 0;
|
|
197
|
+
# int currentLineLength = -1; // don't count the first space
|
|
198
|
+
# final int maxLineLength = 80;
|
|
199
|
+
# int numTokens = 0;
|
|
200
|
+
# int numWordsCurrentLine = 0;
|
|
201
|
+
#}
|
|
202
|
+
|
|
203
|
+
def increase_in_ignorable_element!
|
|
204
|
+
@in_ignorable_element += 1
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def decrease_in_ignorable_element!
|
|
208
|
+
@in_ignorable_element -= 1
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def increase_in_body!
|
|
212
|
+
@in_body += 1
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def decrease_in_body!
|
|
216
|
+
@in_body -= 1
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def in_ignorable_element?
|
|
220
|
+
@in_ignorable_element > 0
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def in_anchor_tag?
|
|
224
|
+
@in_anchor_tag > 0
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def add_text_block(text_block)
|
|
229
|
+
@label_stacks.each do |stack|
|
|
230
|
+
next unless stack
|
|
231
|
+
|
|
232
|
+
stack.each do |label_action|
|
|
233
|
+
text_block.add_label(label_action.labels) if label_action
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
@text_blocks << text_block
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# append space if last character wasn't already one
|
|
240
|
+
def append_space
|
|
241
|
+
return if @sb_last_was_whitespace
|
|
242
|
+
@sb_last_was_whitespace = true
|
|
243
|
+
|
|
244
|
+
@text_buffer << ' '
|
|
245
|
+
@token_buffer << ' '
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def append_text(text)
|
|
249
|
+
@sb_last_was_whitespace = false
|
|
250
|
+
@text_buffer << text
|
|
251
|
+
@token_buffer << text
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def append_token(token)
|
|
255
|
+
@token_buffer << token
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def add_label_action(label_action)
|
|
259
|
+
label_stack = @label_stacks.last
|
|
260
|
+
if label_stack.nil?
|
|
261
|
+
label_stack = []
|
|
262
|
+
@label_stacks.pop
|
|
263
|
+
@label_stacks << label_stack
|
|
264
|
+
end
|
|
265
|
+
label_stack << label_action
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
private
|
|
269
|
+
|
|
270
|
+
def clear_buffers
|
|
271
|
+
@token_buffer = ''
|
|
272
|
+
@text_buffer = ''
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
module Boilerpipe::SAX
|
|
2
|
+
class TagActionMap
|
|
3
|
+
def self.tag_actions
|
|
4
|
+
labels = ::Boilerpipe::Labels
|
|
5
|
+
{
|
|
6
|
+
STYLE: TagActions::IgnorableElement.new,
|
|
7
|
+
SCRIPT: TagActions::IgnorableElement.new,
|
|
8
|
+
OPTION: TagActions::IgnorableElement.new,
|
|
9
|
+
OBJECT: TagActions::IgnorableElement.new,
|
|
10
|
+
EMBED: TagActions::IgnorableElement.new,
|
|
11
|
+
APPLET: TagActions::IgnorableElement.new,
|
|
12
|
+
LINK: TagActions::IgnorableElement.new,
|
|
13
|
+
|
|
14
|
+
A: TagActions::AnchorText.new,
|
|
15
|
+
BODY: TagActions::Body.new,
|
|
16
|
+
|
|
17
|
+
STRIKE: TagActions::InlineNoWhitespace.new,
|
|
18
|
+
U: TagActions::InlineNoWhitespace.new,
|
|
19
|
+
B: TagActions::InlineNoWhitespace.new,
|
|
20
|
+
I: TagActions::InlineNoWhitespace.new,
|
|
21
|
+
EM: TagActions::InlineNoWhitespace.new,
|
|
22
|
+
STRONG: TagActions::InlineNoWhitespace.new,
|
|
23
|
+
SPAN: TagActions::InlineNoWhitespace.new,
|
|
24
|
+
|
|
25
|
+
# New in 1.1 (especially to improve extraction quality from Wikipedia etc.)
|
|
26
|
+
SUP: TagActions::InlineNoWhitespace.new,
|
|
27
|
+
|
|
28
|
+
# New in 1.2
|
|
29
|
+
CODE: TagActions::InlineNoWhitespace.new,
|
|
30
|
+
TT: TagActions::InlineNoWhitespace.new,
|
|
31
|
+
SUB: TagActions::InlineNoWhitespace.new,
|
|
32
|
+
VAR: TagActions::InlineNoWhitespace.new,
|
|
33
|
+
|
|
34
|
+
ABBR: TagActions::InlineWhitespace.new,
|
|
35
|
+
ACRONYM: TagActions::InlineWhitespace.new,
|
|
36
|
+
FONT: TagActions::InlineNoWhitespace.new,
|
|
37
|
+
|
|
38
|
+
# added in 1.1.1
|
|
39
|
+
NOSCRIPT: TagActions::IgnorableElement.new,
|
|
40
|
+
|
|
41
|
+
# New in 1.3
|
|
42
|
+
|
|
43
|
+
LI: TagActions::BlockTagLabel.new(labels::LabelAction.new([:LI])),
|
|
44
|
+
H1: TagActions::BlockTagLabel.new(labels::LabelAction.new([:H1, :HEADING])),
|
|
45
|
+
H2: TagActions::BlockTagLabel.new(labels::LabelAction.new([:H2, :HEADING])),
|
|
46
|
+
H3: TagActions::BlockTagLabel.new(labels::LabelAction.new([:H3, :HEADING]))
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
module Boilerpipe::SAX::TagActions
|
|
2
|
+
class AnchorText
|
|
3
|
+
# Marks this tag as "anchor" (this should usually only be set for the <A> tag). Anchor tags may not be nested.
|
|
4
|
+
# There is a bug in certain versions of NekoHTML which still allows nested tags. If boilerpipe
|
|
5
|
+
#* encounters such nestings, a SAXException is thrown.
|
|
6
|
+
def start(handler, name, attrs)
|
|
7
|
+
if handler.in_anchor_tag?
|
|
8
|
+
handler.in_anchor_tag += 1
|
|
9
|
+
nested_achor_tag_error_recovering(handler, name)
|
|
10
|
+
return
|
|
11
|
+
else
|
|
12
|
+
handler.in_anchor_tag += 1
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
append_anchor_text_start(handler) unless handler.in_ignorable_element?
|
|
16
|
+
false
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def end_tag(handler, name)
|
|
20
|
+
handler.in_anchor_tag -= 1
|
|
21
|
+
append_anchor_text_end(handler) unless handler.in_anchor_tag? || handler.in_ignorable_element?
|
|
22
|
+
false
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def changes_tag_level?
|
|
26
|
+
true
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def append_anchor_text_start(handler)
|
|
30
|
+
handler.append_space
|
|
31
|
+
handler.append_token(Boilerpipe::SAX::HTMLContentHandler::ANCHOR_TEXT_START)
|
|
32
|
+
handler.append_token(' ')
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def append_anchor_text_end(handler)
|
|
36
|
+
handler.append_space
|
|
37
|
+
handler.append_token(Boilerpipe::SAX::HTMLContentHandler::ANCHOR_TEXT_END)
|
|
38
|
+
handler.append_token(' ')
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def nested_achor_tag_error_recovering(handler, name)
|
|
42
|
+
# - dunno about nokogiri???????
|
|
43
|
+
# as nested A elements are not allowed per specification, we
|
|
44
|
+
# are probably reaching this branch due to a bug in the XML parser
|
|
45
|
+
#puts "Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."
|
|
46
|
+
end_tag(handler, name)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Boilerpipe::SAX::TagActions
|
|
2
|
+
# Explicitly marks this tag a simple "block-level" element,
|
|
3
|
+
# which always generates whitespace
|
|
4
|
+
class BlockLevel
|
|
5
|
+
def start(handler, name, attrs)
|
|
6
|
+
true
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def end_tag(handler, name)
|
|
10
|
+
true
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def changes_tag_level?
|
|
14
|
+
true
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module Boilerpipe::SAX::TagActions
|
|
2
|
+
# for block-level elements, which triggers some LabelAction on
|
|
3
|
+
# the generated TextBlock.
|
|
4
|
+
class BlockTagLabel
|
|
5
|
+
def initialize(label_action)
|
|
6
|
+
@label_action = label_action
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def start(handler, name, attrs)
|
|
10
|
+
handler.add_label_action(@label_action)
|
|
11
|
+
true
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def end_tag(handler, name)
|
|
15
|
+
true
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def changes_tag_level?
|
|
19
|
+
true
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|