tagtreescanner 0.8.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY +4 -0
- data/Manifest.txt +1 -1
- data/{README → README.txt} +12 -14
- data/Rakefile +1 -1
- data/lib/tagtreescanner.rb +5 -153
- data/test/test_simplemarkup.rb +11 -20
- data/test/test_tagtreescanner.rb +1 -1
- metadata +3 -2
data/HISTORY
CHANGED
data/Manifest.txt
CHANGED
data/{README → README.txt}
RENAMED
|
@@ -1,18 +1,16 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
= TagTreeScanner
|
|
3
2
|
Author:: Gavin Kistner (mailto:phrogz@mac.com)
|
|
4
3
|
Copyright:: Copyright (c)2005-2007 Gavin Kistner
|
|
5
4
|
License:: MIT License
|
|
6
|
-
Version:: 0.8.
|
|
7
|
-
|
|
8
|
-
= Overview
|
|
5
|
+
Version:: 0.8.1 (2007-November-25)
|
|
9
6
|
|
|
7
|
+
== Overview
|
|
10
8
|
The TagTreeScanner class provides a generic framework for creating a
|
|
11
9
|
nested hierarchy of tags and text (like XML or HTML) by parsing text. An
|
|
12
10
|
example use (and the reason it was written) is to convert a wiki markup
|
|
13
11
|
syntax into HTML.
|
|
14
12
|
|
|
15
|
-
|
|
13
|
+
== Example Usage
|
|
16
14
|
require 'tagtreescanner'
|
|
17
15
|
|
|
18
16
|
class SimpleMarkup < TagTreeScanner
|
|
@@ -90,10 +88,10 @@ syntax into HTML.
|
|
|
90
88
|
#=> end</preformatted>
|
|
91
89
|
#=> <paragraph><italic>That, as they say, is that.</italic></paragraph>
|
|
92
90
|
|
|
93
|
-
|
|
91
|
+
== Details
|
|
94
92
|
|
|
95
|
-
|
|
96
|
-
|
|
93
|
+
=== TagFactories at 10,000 feet
|
|
94
|
+
Each possible output tag is described by a TagFactory, which specifies
|
|
97
95
|
some or all of the following:
|
|
98
96
|
* The name of the tags it creates <i>(required)</i>
|
|
99
97
|
* The regular expression to look for to start the tag
|
|
@@ -105,7 +103,7 @@ some or all of the following:
|
|
|
105
103
|
|
|
106
104
|
See the TagFactory class for more information on specifying factories.
|
|
107
105
|
|
|
108
|
-
|
|
106
|
+
=== Genres as a State Machine
|
|
109
107
|
As a new tag is opened, the scanner uses the Tag#allowed_genre property
|
|
110
108
|
of that tag (set by the +allowed_genre+ property on the TagFactory) to
|
|
111
109
|
determine which tags to be looking for. A genre is specified by adding
|
|
@@ -122,7 +120,7 @@ for</b>. For example:
|
|
|
122
120
|
Note that the +close_match+ regular expression of the current tag is
|
|
123
121
|
always checked before looking to open/create any new tags.
|
|
124
122
|
|
|
125
|
-
|
|
123
|
+
=== Consuming Text
|
|
126
124
|
As the text is being parsed, there will (probably) be many cases where
|
|
127
125
|
you have raw text that doesn't close or open any new tags. Whenever the
|
|
128
126
|
scanner reaches this state, it runs the <tt>@text_match</tt> regexp
|
|
@@ -150,7 +148,7 @@ a tag factory set to look for "Hvv2vvO" to indicate a subscripted '2',
|
|
|
150
148
|
the entire string would be eaten as text and the subscript tag would
|
|
151
149
|
never start.
|
|
152
150
|
|
|
153
|
-
|
|
151
|
+
=== Using the Scanner
|
|
154
152
|
As shown in the example above, consumers of your class initialize it by
|
|
155
153
|
passing in the string to be parsed, and then calling #to_xml or #to_html
|
|
156
154
|
on it.
|
|
@@ -161,11 +159,11 @@ replacing special command tags with other input, or performing database
|
|
|
161
159
|
lookups on special wiki-page-link tags and replacing with HTML
|
|
162
160
|
anchors.)</i>
|
|
163
161
|
|
|
164
|
-
|
|
162
|
+
== Requirements
|
|
165
163
|
TagTreeScanner is built on top of the StringScanner library that is part
|
|
166
164
|
of the standard Ruby installation.
|
|
167
165
|
|
|
168
|
-
|
|
166
|
+
== License
|
|
169
167
|
|
|
170
168
|
(The MIT License)
|
|
171
169
|
|
data/Rakefile
CHANGED
|
@@ -10,7 +10,7 @@ Hoe.new('tagtreescanner', TagTreeScanner::VERSION) do |p|
|
|
|
10
10
|
p.email = 'phrogz@mac.com'
|
|
11
11
|
p.url = ''
|
|
12
12
|
p.summary = 'Meta library for creating classes that turn custom text markup into XML-like tag hierarchies.'
|
|
13
|
-
p.description = IO.read( 'README' )[ /= Overview\n(.+?)^=/m, 1 ].rstrip
|
|
13
|
+
p.description = IO.read( 'README.txt' )[ /= Overview\n(.+?)^=/m, 1 ].rstrip
|
|
14
14
|
p.changes = IO.read( 'HISTORY' )[ /^=[^\n]+\n+(.+?)^=/m, 1 ].rstrip
|
|
15
15
|
p.remote_rdoc_dir = ''
|
|
16
16
|
end
|
data/lib/tagtreescanner.rb
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Author:: Gavin Kistner (mailto:phrogz@mac.com)
|
|
6
6
|
# Copyright:: Copyright (c)2005-2007 Gavin Kistner
|
|
7
7
|
# License:: MIT License
|
|
8
|
-
# Version:: 0.8.
|
|
8
|
+
# Version:: 0.8.1 (2007-November-25)
|
|
9
9
|
|
|
10
10
|
require 'strscan'
|
|
11
11
|
|
|
@@ -15,157 +15,9 @@ require 'strscan'
|
|
|
15
15
|
# example use (and the reason it was written) is to convert a wiki markup
|
|
16
16
|
# syntax into HTML.
|
|
17
17
|
#
|
|
18
|
-
#
|
|
19
|
-
# require 'TagTreeScanner'
|
|
20
|
-
#
|
|
21
|
-
# class SimpleMarkup < TagTreeScanner
|
|
22
|
-
# @root_factory.allows_text = false
|
|
23
|
-
#
|
|
24
|
-
# @tag_genres[ :root ] = [ ]
|
|
25
|
-
#
|
|
26
|
-
# @tag_genres[ :root ] << TagFactory.new( :paragraph,
|
|
27
|
-
# # A line that doesn't have whitespace at the start
|
|
28
|
-
# :open_match => /(?=\S)/, :open_requires_bol => true,
|
|
29
|
-
#
|
|
30
|
-
# # Close when you see a double return
|
|
31
|
-
# :close_match => /\n[ \t]*\n/,
|
|
32
|
-
# :allows_text => true,
|
|
33
|
-
# :allowed_genre => :inline
|
|
34
|
-
# )
|
|
35
|
-
#
|
|
36
|
-
# @tag_genres[ :root ] << TagFactory.new( :preformatted,
|
|
37
|
-
# # Grab all lines that are indented up until a line that isn't
|
|
38
|
-
# :open_match => /((\s+).+?)\n+(?=\S)/m, :open_requires_bol => true,
|
|
39
|
-
# :setup => lambda{ |tag, scanner, tagtree|
|
|
40
|
-
# # Throw the contents I found into the tag
|
|
41
|
-
# # but remove leading whitespace
|
|
42
|
-
# tag << scanner[1].gsub( /^#{scanner[2]}/, '' )
|
|
43
|
-
# },
|
|
44
|
-
# :autoclose => :true
|
|
45
|
-
# )
|
|
46
|
-
#
|
|
47
|
-
# @tag_genres[ :inline ] = [ ]
|
|
48
|
-
#
|
|
49
|
-
# @tag_genres[ :inline ] << TagFactory.new( :bold,
|
|
50
|
-
# # An asterisk followed by a letter or number
|
|
51
|
-
# :open_match => /\*(?=[a-z0-9])/i,
|
|
52
|
-
#
|
|
53
|
-
# # Close when I see an asterisk OR a newline coming up
|
|
54
|
-
# :close_match => /\*|(?=\n)/,
|
|
55
|
-
# :allows_text => true,
|
|
56
|
-
# :allowed_genre => :inline
|
|
57
|
-
# )
|
|
58
|
-
#
|
|
59
|
-
# @tag_genres[ :inline ] << TagFactory.new( :italic,
|
|
60
|
-
# # An underscore followed by a letter or number
|
|
61
|
-
# :open_match => /_(?=[a-z0-9])/i,
|
|
62
|
-
#
|
|
63
|
-
# # Close when I see an underscore OR a newline coming up
|
|
64
|
-
# :close_match => /_|(?=\n)/,
|
|
65
|
-
# :allows_text => true,
|
|
66
|
-
# :allowed_genre => :inline
|
|
67
|
-
# )
|
|
68
|
-
# end
|
|
69
|
-
#
|
|
70
|
-
# raw_text = <<ENDINPUT
|
|
71
|
-
# Hello World! You're _soaking in_ my test.
|
|
72
|
-
# This is a *subset* of markup that I allow.
|
|
73
|
-
#
|
|
74
|
-
# Hi paragraph two. Yo! A code sample:
|
|
75
|
-
#
|
|
76
|
-
# def foo
|
|
77
|
-
# puts "Whee!"
|
|
78
|
-
# end
|
|
79
|
-
#
|
|
80
|
-
# _That, as they say, is that._
|
|
81
|
-
#
|
|
82
|
-
# ENDINPUT
|
|
83
|
-
#
|
|
84
|
-
# markup = SimpleMarkup.new( raw_text ).to_xml
|
|
85
|
-
# puts markup
|
|
86
|
-
#
|
|
87
|
-
#
|
|
88
|
-
# #=> <paragraph>Hello World! You're <italic>soaking in</italic> my test.
|
|
89
|
-
# #=> This is a <bold>subset</bold> of markup that I allow.</paragraph>
|
|
90
|
-
# #=> <paragraph>Hi paragraph two. Yo! A code sample:</paragraph>
|
|
91
|
-
# #=> <preformatted>def foo
|
|
92
|
-
# #=> puts "Whee!"
|
|
93
|
-
# #=> end</preformatted>
|
|
94
|
-
# #=> <paragraph><italic>That, as they say, is that.</italic></paragraph>
|
|
95
|
-
#
|
|
96
|
-
#
|
|
97
|
-
# = Details
|
|
98
|
-
#
|
|
99
|
-
# == TagFactories at 10,000 feet
|
|
100
|
-
# Each possible output tag is described by a TagFactory, which specifies
|
|
101
|
-
# some or all of the following:
|
|
102
|
-
# * The name of the tags it creates <i>(required)</i>
|
|
103
|
-
# * The regular expression to look for to start the tag
|
|
104
|
-
# * The regular expression to look for to close the tag, or
|
|
105
|
-
# * Whether the tag is automatically closed after creation
|
|
106
|
-
# * What genre of tags are allowed within the tag
|
|
107
|
-
# * Whether the tag supports raw text inside it
|
|
108
|
-
# * Code to run when creating a tag
|
|
109
|
-
#
|
|
110
|
-
# See the TagFactory class for more information on specifying factories.
|
|
111
|
-
#
|
|
112
|
-
# == Genres as a State Machine
|
|
113
|
-
# As a new tag is opened, the scanner uses the Tag#allowed_genre property
|
|
114
|
-
# of that tag (set by the +allowed_genre+ property on the TagFactory) to
|
|
115
|
-
# determine which tags to be looking for. A genre is specified by adding
|
|
116
|
-
# an array in the <tt>@tag_genres</tt> hash, whose key is the genre name.
|
|
117
|
-
# For example:
|
|
118
|
-
# @tag_genres[ :inline ] = [ ]
|
|
119
|
-
# adds a new genre named 'inline', with no tags in it. TagFactory instances
|
|
120
|
-
# should be pushed onto this array <b>in the order that they should be looked
|
|
121
|
-
# for</b>. For example:
|
|
122
|
-
# @tag_genres[ :inline ] << TagFactory.new( :italic,
|
|
123
|
-
# # see the TagFactory#initialize for options
|
|
124
|
-
# )
|
|
125
|
-
#
|
|
126
|
-
# Note that the +close_match+ regular expression of the current tag is
|
|
127
|
-
# always checked before looking to open/create any new tags.
|
|
128
|
-
#
|
|
129
|
-
# == Consuming Text
|
|
130
|
-
# As the text is being parsed, there will (probably) be many cases where
|
|
131
|
-
# you have raw text that doesn't close or open any new tags. Whenever the
|
|
132
|
-
# scanner reaches this state, it runs the <tt>@text_match</tt> regexp
|
|
133
|
-
# against the text to move the pointer ahead. If the current tag has
|
|
134
|
-
# <tt>Tag#allows_text?</tt> set to +true+ (through
|
|
135
|
-
# <tt>TagFactory#allows_text</tt>), then this text is added as contents of
|
|
136
|
-
# the tag. If not, the text is thrown away.
|
|
137
|
-
#
|
|
138
|
-
# The safest regular expression consumes only one character at a time:
|
|
139
|
-
# @text_match = /./m
|
|
140
|
-
#
|
|
141
|
-
# <b><i>It is vital that your regexp match newlines</i></b> (the 'm')
|
|
142
|
-
# <b><i>unless every single one of your tags is set to close upon seeing
|
|
143
|
-
# a newline.</i></b>
|
|
144
|
-
#
|
|
145
|
-
# Unfortunately, the safest regular expression is also the slowest. If
|
|
146
|
-
# speed is an issue, your regexp should strive to eat as many characters as
|
|
147
|
-
# possible at once...while ensuring that it doesn't eat characters that
|
|
148
|
-
# would signify the start of a new tag.
|
|
149
|
-
#
|
|
150
|
-
# For example, setting a regexp like:
|
|
151
|
-
# @text_match = /\w+|./m
|
|
152
|
-
# allows the scanner to match a whole word at a time. However, if you have
|
|
153
|
-
# a tag factory set to look for "Hvv2vvO" to indicate a subscripted '2',
|
|
154
|
-
# the entire string would be eaten as text and the subscript tag would
|
|
155
|
-
# never start.
|
|
156
|
-
#
|
|
157
|
-
# == Using the Scanner
|
|
158
|
-
# As shown in the example above, consumers of your class initialize it by
|
|
159
|
-
# passing in the string to be parsed, and then calling #to_xml or #to_html
|
|
160
|
-
# on it.
|
|
161
|
-
#
|
|
162
|
-
# <i>(This two-step process allows the consumer to run other code after
|
|
163
|
-
# the tag parsing, before final conversion. Examples might include
|
|
164
|
-
# replacing special command tags with other input, or performing database
|
|
165
|
-
# lookups on special wiki-page-link tags and replacing with HTML
|
|
166
|
-
# anchors.)</i>
|
|
18
|
+
# See the link:README.txt.html file for examples and more information.
|
|
167
19
|
class TagTreeScanner
|
|
168
|
-
VERSION = "0.8.
|
|
20
|
+
VERSION = "0.8.1"
|
|
169
21
|
|
|
170
22
|
# A TagFactory holds the information about a specific kind of tag:
|
|
171
23
|
# * the name of the tag
|
|
@@ -536,8 +388,8 @@ class TagTreeScanner
|
|
|
536
388
|
end
|
|
537
389
|
end
|
|
538
390
|
|
|
539
|
-
# Set the text content of this element to _new_contents_
|
|
540
|
-
# Removes any child tags (and their text)
|
|
391
|
+
# Set the text content of this element to _new_contents_.
|
|
392
|
+
# Removes any child tags (and their text).
|
|
541
393
|
def text=( new_contents )
|
|
542
394
|
@child_tags.clear
|
|
543
395
|
append_child( TextNode.new( new_contents ) )
|
data/test/test_simplemarkup.rb
CHANGED
|
@@ -55,30 +55,21 @@ class Tag_Test < Test::Unit::TestCase
|
|
|
55
55
|
end
|
|
56
56
|
|
|
57
57
|
def test_conversion
|
|
58
|
-
raw_text =
|
|
59
|
-
|
|
60
|
-
|
|
58
|
+
raw_text = <<ENDINPUT
|
|
59
|
+
Hello World! You're _soaking in_ my test.
|
|
60
|
+
This is a *subset* of markup that I allow.
|
|
61
61
|
|
|
62
|
-
|
|
62
|
+
Hi paragraph two. Yo! A code sample:
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
_That, as they say, is that._
|
|
64
|
+
def foo
|
|
65
|
+
puts "Whee!"
|
|
66
|
+
end
|
|
69
67
|
|
|
70
|
-
|
|
68
|
+
_That, as they say, is that._
|
|
69
|
+
ENDINPUT
|
|
71
70
|
|
|
72
71
|
markup = SimpleMarkup.new( raw_text ).to_xml
|
|
73
|
-
|
|
72
|
+
expected = "<paragraph>Hello World! You're <italic>soaking in</italic> my test.\nThis is a <bold>subset</bold> of markup that I allow.</paragraph>\n<paragraph>Hi paragraph two. Yo! A code sample:</paragraph>\n<preformatted>\ndef foo\n puts \"Whee!\"\nend</preformatted>\n<paragraph><italic>That, as they say, is that.</italic>\n</paragraph>\n"
|
|
73
|
+
assert_equal( expected, markup )
|
|
74
74
|
end
|
|
75
75
|
end
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
#=> <paragraph>Hello World! You're <italic>soaking in</italic> my test.
|
|
79
|
-
#=> This is a <bold>subset</bold> of markup that I allow.</paragraph>
|
|
80
|
-
#=> <paragraph>Hi paragraph two. Yo! A code sample:</paragraph>
|
|
81
|
-
#=> <preformatted>def foo
|
|
82
|
-
#=> puts "Whee!"
|
|
83
|
-
#=> end</preformatted>
|
|
84
|
-
#=> <paragraph><italic>That, as they say, is that.</italic></paragraph>
|
data/test/test_tagtreescanner.rb
CHANGED
metadata
CHANGED
|
@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
|
|
|
3
3
|
specification_version: 1
|
|
4
4
|
name: tagtreescanner
|
|
5
5
|
version: !ruby/object:Gem::Version
|
|
6
|
-
version: 0.8.
|
|
6
|
+
version: 0.8.1
|
|
7
7
|
date: 2007-11-25 00:00:00 -07:00
|
|
8
8
|
summary: Meta library for creating classes that turn custom text markup into XML-like tag hierarchies.
|
|
9
9
|
require_paths:
|
|
@@ -31,7 +31,7 @@ authors:
|
|
|
31
31
|
files:
|
|
32
32
|
- HISTORY
|
|
33
33
|
- Manifest.txt
|
|
34
|
-
- README
|
|
34
|
+
- README.txt
|
|
35
35
|
- Rakefile
|
|
36
36
|
- TODO
|
|
37
37
|
- lib/tagtreescanner.rb
|
|
@@ -45,6 +45,7 @@ rdoc_options:
|
|
|
45
45
|
- README.txt
|
|
46
46
|
extra_rdoc_files:
|
|
47
47
|
- Manifest.txt
|
|
48
|
+
- README.txt
|
|
48
49
|
executables: []
|
|
49
50
|
|
|
50
51
|
extensions: []
|