tagtreescanner 0.8.0 → 0.8.1
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +4 -0
- data/Manifest.txt +1 -1
- data/{README → README.txt} +12 -14
- data/Rakefile +1 -1
- data/lib/tagtreescanner.rb +5 -153
- data/test/test_simplemarkup.rb +11 -20
- data/test/test_tagtreescanner.rb +1 -1
- metadata +3 -2
data/HISTORY
CHANGED
data/Manifest.txt
CHANGED
data/{README → README.txt}
RENAMED
@@ -1,18 +1,16 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
= TagTreeScanner
|
3
2
|
Author:: Gavin Kistner (mailto:phrogz@mac.com)
|
4
3
|
Copyright:: Copyright (c)2005-2007 Gavin Kistner
|
5
4
|
License:: MIT License
|
6
|
-
Version:: 0.8.
|
7
|
-
|
8
|
-
= Overview
|
5
|
+
Version:: 0.8.1 (2007-November-25)
|
9
6
|
|
7
|
+
== Overview
|
10
8
|
The TagTreeScanner class provides a generic framework for creating a
|
11
9
|
nested hierarchy of tags and text (like XML or HTML) by parsing text. An
|
12
10
|
example use (and the reason it was written) is to convert a wiki markup
|
13
11
|
syntax into HTML.
|
14
12
|
|
15
|
-
|
13
|
+
== Example Usage
|
16
14
|
require 'tagtreescanner'
|
17
15
|
|
18
16
|
class SimpleMarkup < TagTreeScanner
|
@@ -90,10 +88,10 @@ syntax into HTML.
|
|
90
88
|
#=> end</preformatted>
|
91
89
|
#=> <paragraph><italic>That, as they say, is that.</italic></paragraph>
|
92
90
|
|
93
|
-
|
91
|
+
== Details
|
94
92
|
|
95
|
-
|
96
|
-
|
93
|
+
=== TagFactories at 10,000 feet
|
94
|
+
Each possible output tag is described by a TagFactory, which specifies
|
97
95
|
some or all of the following:
|
98
96
|
* The name of the tags it creates <i>(required)</i>
|
99
97
|
* The regular expression to look for to start the tag
|
@@ -105,7 +103,7 @@ some or all of the following:
|
|
105
103
|
|
106
104
|
See the TagFactory class for more information on specifying factories.
|
107
105
|
|
108
|
-
|
106
|
+
=== Genres as a State Machine
|
109
107
|
As a new tag is opened, the scanner uses the Tag#allowed_genre property
|
110
108
|
of that tag (set by the +allowed_genre+ property on the TagFactory) to
|
111
109
|
determine which tags to be looking for. A genre is specified by adding
|
@@ -122,7 +120,7 @@ for</b>. For example:
|
|
122
120
|
Note that the +close_match+ regular expression of the current tag is
|
123
121
|
always checked before looking to open/create any new tags.
|
124
122
|
|
125
|
-
|
123
|
+
=== Consuming Text
|
126
124
|
As the text is being parsed, there will (probably) be many cases where
|
127
125
|
you have raw text that doesn't close or open any new tags. Whenever the
|
128
126
|
scanner reaches this state, it runs the <tt>@text_match</tt> regexp
|
@@ -150,7 +148,7 @@ a tag factory set to look for "Hvv2vvO" to indicate a subscripted '2',
|
|
150
148
|
the entire string would be eaten as text and the subscript tag would
|
151
149
|
never start.
|
152
150
|
|
153
|
-
|
151
|
+
=== Using the Scanner
|
154
152
|
As shown in the example above, consumers of your class initialize it by
|
155
153
|
passing in the string to be parsed, and then calling #to_xml or #to_html
|
156
154
|
on it.
|
@@ -161,11 +159,11 @@ replacing special command tags with other input, or performing database
|
|
161
159
|
lookups on special wiki-page-link tags and replacing with HTML
|
162
160
|
anchors.)</i>
|
163
161
|
|
164
|
-
|
162
|
+
== Requirements
|
165
163
|
TagTreeScanner is built on top of the StringScanner library that is part
|
166
164
|
of the standard Ruby installation.
|
167
165
|
|
168
|
-
|
166
|
+
== License
|
169
167
|
|
170
168
|
(The MIT License)
|
171
169
|
|
data/Rakefile
CHANGED
@@ -10,7 +10,7 @@ Hoe.new('tagtreescanner', TagTreeScanner::VERSION) do |p|
|
|
10
10
|
p.email = 'phrogz@mac.com'
|
11
11
|
p.url = ''
|
12
12
|
p.summary = 'Meta library for creating classes that turn custom text markup into XML-like tag hierarchies.'
|
13
|
-
p.description = IO.read( 'README' )[ /= Overview\n(.+?)^=/m, 1 ].rstrip
|
13
|
+
p.description = IO.read( 'README.txt' )[ /= Overview\n(.+?)^=/m, 1 ].rstrip
|
14
14
|
p.changes = IO.read( 'HISTORY' )[ /^=[^\n]+\n+(.+?)^=/m, 1 ].rstrip
|
15
15
|
p.remote_rdoc_dir = ''
|
16
16
|
end
|
data/lib/tagtreescanner.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
# Author:: Gavin Kistner (mailto:phrogz@mac.com)
|
6
6
|
# Copyright:: Copyright (c)2005-2007 Gavin Kistner
|
7
7
|
# License:: MIT License
|
8
|
-
# Version:: 0.8.
|
8
|
+
# Version:: 0.8.1 (2007-November-25)
|
9
9
|
|
10
10
|
require 'strscan'
|
11
11
|
|
@@ -15,157 +15,9 @@ require 'strscan'
|
|
15
15
|
# example use (and the reason it was written) is to convert a wiki markup
|
16
16
|
# syntax into HTML.
|
17
17
|
#
|
18
|
-
#
|
19
|
-
# require 'TagTreeScanner'
|
20
|
-
#
|
21
|
-
# class SimpleMarkup < TagTreeScanner
|
22
|
-
# @root_factory.allows_text = false
|
23
|
-
#
|
24
|
-
# @tag_genres[ :root ] = [ ]
|
25
|
-
#
|
26
|
-
# @tag_genres[ :root ] << TagFactory.new( :paragraph,
|
27
|
-
# # A line that doesn't have whitespace at the start
|
28
|
-
# :open_match => /(?=\S)/, :open_requires_bol => true,
|
29
|
-
#
|
30
|
-
# # Close when you see a double return
|
31
|
-
# :close_match => /\n[ \t]*\n/,
|
32
|
-
# :allows_text => true,
|
33
|
-
# :allowed_genre => :inline
|
34
|
-
# )
|
35
|
-
#
|
36
|
-
# @tag_genres[ :root ] << TagFactory.new( :preformatted,
|
37
|
-
# # Grab all lines that are indented up until a line that isn't
|
38
|
-
# :open_match => /((\s+).+?)\n+(?=\S)/m, :open_requires_bol => true,
|
39
|
-
# :setup => lambda{ |tag, scanner, tagtree|
|
40
|
-
# # Throw the contents I found into the tag
|
41
|
-
# # but remove leading whitespace
|
42
|
-
# tag << scanner[1].gsub( /^#{scanner[2]}/, '' )
|
43
|
-
# },
|
44
|
-
# :autoclose => :true
|
45
|
-
# )
|
46
|
-
#
|
47
|
-
# @tag_genres[ :inline ] = [ ]
|
48
|
-
#
|
49
|
-
# @tag_genres[ :inline ] << TagFactory.new( :bold,
|
50
|
-
# # An asterisk followed by a letter or number
|
51
|
-
# :open_match => /\*(?=[a-z0-9])/i,
|
52
|
-
#
|
53
|
-
# # Close when I see an asterisk OR a newline coming up
|
54
|
-
# :close_match => /\*|(?=\n)/,
|
55
|
-
# :allows_text => true,
|
56
|
-
# :allowed_genre => :inline
|
57
|
-
# )
|
58
|
-
#
|
59
|
-
# @tag_genres[ :inline ] << TagFactory.new( :italic,
|
60
|
-
# # An underscore followed by a letter or number
|
61
|
-
# :open_match => /_(?=[a-z0-9])/i,
|
62
|
-
#
|
63
|
-
# # Close when I see an underscore OR a newline coming up
|
64
|
-
# :close_match => /_|(?=\n)/,
|
65
|
-
# :allows_text => true,
|
66
|
-
# :allowed_genre => :inline
|
67
|
-
# )
|
68
|
-
# end
|
69
|
-
#
|
70
|
-
# raw_text = <<ENDINPUT
|
71
|
-
# Hello World! You're _soaking in_ my test.
|
72
|
-
# This is a *subset* of markup that I allow.
|
73
|
-
#
|
74
|
-
# Hi paragraph two. Yo! A code sample:
|
75
|
-
#
|
76
|
-
# def foo
|
77
|
-
# puts "Whee!"
|
78
|
-
# end
|
79
|
-
#
|
80
|
-
# _That, as they say, is that._
|
81
|
-
#
|
82
|
-
# ENDINPUT
|
83
|
-
#
|
84
|
-
# markup = SimpleMarkup.new( raw_text ).to_xml
|
85
|
-
# puts markup
|
86
|
-
#
|
87
|
-
#
|
88
|
-
# #=> <paragraph>Hello World! You're <italic>soaking in</italic> my test.
|
89
|
-
# #=> This is a <bold>subset</bold> of markup that I allow.</paragraph>
|
90
|
-
# #=> <paragraph>Hi paragraph two. Yo! A code sample:</paragraph>
|
91
|
-
# #=> <preformatted>def foo
|
92
|
-
# #=> puts "Whee!"
|
93
|
-
# #=> end</preformatted>
|
94
|
-
# #=> <paragraph><italic>That, as they say, is that.</italic></paragraph>
|
95
|
-
#
|
96
|
-
#
|
97
|
-
# = Details
|
98
|
-
#
|
99
|
-
# == TagFactories at 10,000 feet
|
100
|
-
# Each possible output tag is described by a TagFactory, which specifies
|
101
|
-
# some or all of the following:
|
102
|
-
# * The name of the tags it creates <i>(required)</i>
|
103
|
-
# * The regular expression to look for to start the tag
|
104
|
-
# * The regular expression to look for to close the tag, or
|
105
|
-
# * Whether the tag is automatically closed after creation
|
106
|
-
# * What genre of tags are allowed within the tag
|
107
|
-
# * Whether the tag supports raw text inside it
|
108
|
-
# * Code to run when creating a tag
|
109
|
-
#
|
110
|
-
# See the TagFactory class for more information on specifying factories.
|
111
|
-
#
|
112
|
-
# == Genres as a State Machine
|
113
|
-
# As a new tag is opened, the scanner uses the Tag#allowed_genre property
|
114
|
-
# of that tag (set by the +allowed_genre+ property on the TagFactory) to
|
115
|
-
# determine which tags to be looking for. A genre is specified by adding
|
116
|
-
# an array in the <tt>@tag_genres</tt> hash, whose key is the genre name.
|
117
|
-
# For example:
|
118
|
-
# @tag_genres[ :inline ] = [ ]
|
119
|
-
# adds a new genre named 'inline', with no tags in it. TagFactory instances
|
120
|
-
# should be pushed onto this array <b>in the order that they should be looked
|
121
|
-
# for</b>. For example:
|
122
|
-
# @tag_genres[ :inline ] << TagFactory.new( :italic,
|
123
|
-
# # see the TagFactory#initialize for options
|
124
|
-
# )
|
125
|
-
#
|
126
|
-
# Note that the +close_match+ regular expression of the current tag is
|
127
|
-
# always checked before looking to open/create any new tags.
|
128
|
-
#
|
129
|
-
# == Consuming Text
|
130
|
-
# As the text is being parsed, there will (probably) be many cases where
|
131
|
-
# you have raw text that doesn't close or open any new tags. Whenever the
|
132
|
-
# scanner reaches this state, it runs the <tt>@text_match</tt> regexp
|
133
|
-
# against the text to move the pointer ahead. If the current tag has
|
134
|
-
# <tt>Tag#allows_text?</tt> set to +true+ (through
|
135
|
-
# <tt>TagFactory#allows_text</tt>), then this text is added as contents of
|
136
|
-
# the tag. If not, the text is thrown away.
|
137
|
-
#
|
138
|
-
# The safest regular expression consumes only one character at a time:
|
139
|
-
# @text_match = /./m
|
140
|
-
#
|
141
|
-
# <b><i>It is vital that your regexp match newlines</i></b> (the 'm')
|
142
|
-
# <b><i>unless every single one of your tags is set to close upon seeing
|
143
|
-
# a newline.</i></b>
|
144
|
-
#
|
145
|
-
# Unfortunately, the safest regular expression is also the slowest. If
|
146
|
-
# speed is an issue, your regexp should strive to eat as many characters as
|
147
|
-
# possible at once...while ensuring that it doesn't eat characters that
|
148
|
-
# would signify the start of a new tag.
|
149
|
-
#
|
150
|
-
# For example, setting a regexp like:
|
151
|
-
# @text_match = /\w+|./m
|
152
|
-
# allows the scanner to match a whole word at a time. However, if you have
|
153
|
-
# a tag factory set to look for "Hvv2vvO" to indicate a subscripted '2',
|
154
|
-
# the entire string would be eaten as text and the subscript tag would
|
155
|
-
# never start.
|
156
|
-
#
|
157
|
-
# == Using the Scanner
|
158
|
-
# As shown in the example above, consumers of your class initialize it by
|
159
|
-
# passing in the string to be parsed, and then calling #to_xml or #to_html
|
160
|
-
# on it.
|
161
|
-
#
|
162
|
-
# <i>(This two-step process allows the consumer to run other code after
|
163
|
-
# the tag parsing, before final conversion. Examples might include
|
164
|
-
# replacing special command tags with other input, or performing database
|
165
|
-
# lookups on special wiki-page-link tags and replacing with HTML
|
166
|
-
# anchors.)</i>
|
18
|
+
# See the link:README.txt.html file for examples and more information.
|
167
19
|
class TagTreeScanner
|
168
|
-
VERSION = "0.8.
|
20
|
+
VERSION = "0.8.1"
|
169
21
|
|
170
22
|
# A TagFactory holds the information about a specific kind of tag:
|
171
23
|
# * the name of the tag
|
@@ -536,8 +388,8 @@ class TagTreeScanner
|
|
536
388
|
end
|
537
389
|
end
|
538
390
|
|
539
|
-
# Set the text content of this element to _new_contents_
|
540
|
-
# Removes any child tags (and their text)
|
391
|
+
# Set the text content of this element to _new_contents_.
|
392
|
+
# Removes any child tags (and their text).
|
541
393
|
def text=( new_contents )
|
542
394
|
@child_tags.clear
|
543
395
|
append_child( TextNode.new( new_contents ) )
|
data/test/test_simplemarkup.rb
CHANGED
@@ -55,30 +55,21 @@ class Tag_Test < Test::Unit::TestCase
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def test_conversion
|
58
|
-
raw_text =
|
59
|
-
|
60
|
-
|
58
|
+
raw_text = <<ENDINPUT
|
59
|
+
Hello World! You're _soaking in_ my test.
|
60
|
+
This is a *subset* of markup that I allow.
|
61
61
|
|
62
|
-
|
62
|
+
Hi paragraph two. Yo! A code sample:
|
63
63
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
_That, as they say, is that._
|
64
|
+
def foo
|
65
|
+
puts "Whee!"
|
66
|
+
end
|
69
67
|
|
70
|
-
|
68
|
+
_That, as they say, is that._
|
69
|
+
ENDINPUT
|
71
70
|
|
72
71
|
markup = SimpleMarkup.new( raw_text ).to_xml
|
73
|
-
|
72
|
+
expected = "<paragraph>Hello World! You're <italic>soaking in</italic> my test.\nThis is a <bold>subset</bold> of markup that I allow.</paragraph>\n<paragraph>Hi paragraph two. Yo! A code sample:</paragraph>\n<preformatted>\ndef foo\n puts \"Whee!\"\nend</preformatted>\n<paragraph><italic>That, as they say, is that.</italic>\n</paragraph>\n"
|
73
|
+
assert_equal( expected, markup )
|
74
74
|
end
|
75
75
|
end
|
76
|
-
|
77
|
-
|
78
|
-
#=> <paragraph>Hello World! You're <italic>soaking in</italic> my test.
|
79
|
-
#=> This is a <bold>subset</bold> of markup that I allow.</paragraph>
|
80
|
-
#=> <paragraph>Hi paragraph two. Yo! A code sample:</paragraph>
|
81
|
-
#=> <preformatted>def foo
|
82
|
-
#=> puts "Whee!"
|
83
|
-
#=> end</preformatted>
|
84
|
-
#=> <paragraph><italic>That, as they say, is that.</italic></paragraph>
|
data/test/test_tagtreescanner.rb
CHANGED
metadata
CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
|
|
3
3
|
specification_version: 1
|
4
4
|
name: tagtreescanner
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.8.
|
6
|
+
version: 0.8.1
|
7
7
|
date: 2007-11-25 00:00:00 -07:00
|
8
8
|
summary: Meta library for creating classes that turn custom text markup into XML-like tag hierarchies.
|
9
9
|
require_paths:
|
@@ -31,7 +31,7 @@ authors:
|
|
31
31
|
files:
|
32
32
|
- HISTORY
|
33
33
|
- Manifest.txt
|
34
|
-
- README
|
34
|
+
- README.txt
|
35
35
|
- Rakefile
|
36
36
|
- TODO
|
37
37
|
- lib/tagtreescanner.rb
|
@@ -45,6 +45,7 @@ rdoc_options:
|
|
45
45
|
- README.txt
|
46
46
|
extra_rdoc_files:
|
47
47
|
- Manifest.txt
|
48
|
+
- README.txt
|
48
49
|
executables: []
|
49
50
|
|
50
51
|
extensions: []
|