semantictext 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +1 -1
- data/TODO.rdoc +4 -3
- data/doc/demo.script +4 -0
- data/lib/semantictext/document.rb +118 -90
- data/lib/semantictext/verbatim.rb +31 -0
- data/test/test_export.rb +1 -1
- data/testfiles/complex.art +7 -0
- data/testfiles/regression-exportsample.txt +7 -0
- metadata +3 -2
data/README.rdoc
CHANGED
@@ -38,7 +38,7 @@ Semantic text supports:
|
|
38
38
|
* inline hyperlinks for http: mailto: and ftp:
|
39
39
|
* markup tags within bullet points
|
40
40
|
* custom markup tags e.g. postal code, youtube video embed, ... whatever you define in a subclass of SemanticText::DefaultTagFactory
|
41
|
-
|
41
|
+
* verbatim code blocks delimited by lines marked "{{{" and "}}}" respectively
|
42
42
|
|
43
43
|
=== Compatibility
|
44
44
|
This project is being developed on OS X. Automated testing for Linux will be included in future releases.
|
data/TODO.rdoc
CHANGED
@@ -1,17 +1,18 @@
|
|
1
1
|
==to do
|
2
|
-
* improve testing by mocking out tag factory used in tests - consider how/whether to do this
|
3
2
|
* tighten up error test cases on ftptag, httptag and mailtotag
|
4
|
-
*
|
5
|
-
* support wikinames as a special custom tag
|
3
|
+
* improve testing by mocking out tag factory used in tests - consider how/whether to do this
|
6
4
|
* fix path to testfiles so test can run on gem - tests only run on source at the moment
|
7
5
|
|
8
6
|
==maybe
|
7
|
+
* support wikinames as a special custom tag
|
9
8
|
* consider including section objects that are delimited by headings and that contain all objects in a section
|
10
9
|
* think about how to support twitter with special structure tags e.g. #keyword and @user
|
11
10
|
* refactor parser into header parser and text parser
|
12
11
|
* pull out parsers for different parts and use the state pattern
|
13
12
|
|
14
13
|
==done
|
14
|
+
* add verbatim code sections between {{{ and }}} brackets at start of line
|
15
|
+
* added multi-level headings (in release 0.2.1)
|
15
16
|
* added basic rcov test coverage task to Rakefile
|
16
17
|
* find a way to generate demo.txt file from rake
|
17
18
|
* make demo.txt be up-to-date as dependency of rdoc
|
data/doc/demo.script
CHANGED
@@ -7,6 +7,10 @@ p.parse "!I'm a title"
|
|
7
7
|
p.parse "I'm a paragraph..."
|
8
8
|
p.parse "... yep, I'm still that paragraph."
|
9
9
|
p.parse ''
|
10
|
+
p.parse '{{{'
|
11
|
+
p.parse '10 PRINT "HELLO"'
|
12
|
+
p.parse '20 GOTO 10'
|
13
|
+
p.parse '}}}'
|
10
14
|
p.parse "* I'm a bullet point"
|
11
15
|
p.parse "* I'm another bullet point"
|
12
16
|
|
@@ -10,6 +10,7 @@ require 'semantictext/bulletedlist'
|
|
10
10
|
require 'semantictext/bullet'
|
11
11
|
require 'semantictext/bulleted_list_parser'
|
12
12
|
require 'semantictext/rich_text_parser'
|
13
|
+
require 'semantictext/verbatim'
|
13
14
|
|
14
15
|
module SemanticText
|
15
16
|
|
@@ -18,109 +19,136 @@ module SemanticText
|
|
18
19
|
#* I initiate and co-ordinate document-wide operations.
|
19
20
|
class Document
|
20
21
|
# title of the document
|
21
|
-
|
22
|
-
|
23
|
-
# date the document was created
|
24
|
-
attr_reader :createdAt
|
25
|
-
|
26
|
-
# keyword list for the current document
|
27
|
-
attr_reader :keywords
|
28
|
-
|
29
|
-
# pathname of the file currently being parsed (if it exists, nil otherwise)
|
30
|
-
attr_reader :pathname
|
31
|
-
|
32
|
-
# the object model of the parsed document
|
33
|
-
attr_reader :content
|
22
|
+
attr_reader :title
|
34
23
|
|
35
|
-
|
36
|
-
|
37
|
-
@headers_completed = false
|
38
|
-
@content = []
|
39
|
-
@current_paragraph = nil
|
40
|
-
@bulleted_list_parser = nil
|
41
|
-
@rich_text_parser = RichTextParser.new(tag_factory)
|
42
|
-
end
|
24
|
+
# date the document was created
|
25
|
+
attr_reader :createdAt
|
43
26
|
|
44
|
-
#
|
45
|
-
|
46
|
-
out = ""
|
47
|
-
content.each {|element| out=out+element.export_html}
|
48
|
-
out = out + "\n"
|
49
|
-
end
|
27
|
+
# keyword list for the current document
|
28
|
+
attr_reader :keywords
|
50
29
|
|
51
|
-
#
|
52
|
-
|
53
|
-
|
54
|
-
|
30
|
+
# pathname of the file currently being parsed (if it exists, nil otherwise)
|
31
|
+
attr_reader :pathname
|
32
|
+
|
33
|
+
# the object model of the parsed document
|
34
|
+
attr_reader :content
|
35
|
+
|
36
|
+
def initialize(tag_factory=DefaultTagFactory.new)
|
37
|
+
@pathname=nil
|
38
|
+
@headers_completed = false
|
39
|
+
@content = []
|
40
|
+
@current_paragraph = nil
|
41
|
+
@verbatim = nil
|
42
|
+
@bulleted_list_parser = nil
|
43
|
+
@rich_text_parser = RichTextParser.new(tag_factory)
|
44
|
+
end
|
45
|
+
|
46
|
+
# export as html
|
47
|
+
def export_html
|
48
|
+
out = ""
|
49
|
+
content.each {|element| out=out+element.export_html}
|
50
|
+
out = out + "\n"
|
51
|
+
end
|
52
|
+
|
53
|
+
# true iff I have seen the end of the headers section at the top of the document
|
54
|
+
def parameters_complete?
|
55
|
+
@headers_completed
|
56
|
+
end
|
55
57
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
58
|
+
# parse a document into this object from pathname specified by file
|
59
|
+
def parse_from(file)
|
60
|
+
@pathname=file
|
61
|
+
f = File.new(file)
|
62
|
+
f.each_line do |line|
|
63
|
+
parse(line)
|
64
|
+
end
|
65
|
+
f.close
|
66
|
+
end
|
65
67
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
68
|
+
# parse an individual <i>line</i> of String appending content
|
69
|
+
# into the current document held by this object
|
70
|
+
def parse(line)
|
71
|
+
line.chomp!
|
72
|
+
begin
|
73
|
+
if (!@headers_completed)
|
74
|
+
process_header_line(line)
|
75
|
+
else
|
76
|
+
parse_line(line)
|
77
|
+
end
|
76
78
|
rescue NotHeaderLine
|
77
79
|
@headers_completed = true
|
78
80
|
parse_line(line)
|
79
|
-
|
80
|
-
|
81
|
+
end
|
82
|
+
end
|
81
83
|
|
82
|
-
|
84
|
+
private
|
83
85
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
86
|
+
def process_header_line(headerLine)
|
87
|
+
splitLine = headerLine.split(':',2)
|
88
|
+
(attributeName, value) = splitLine
|
89
|
+
raise NotHeaderLine.new() if splitLine.size <2
|
90
|
+
attributeName.strip!
|
91
|
+
@title = value if attributeName=='title'
|
92
|
+
@createdAt = DateExtractor.new.extract_from(value) if attributeName=='createdAt'
|
93
|
+
@keywords = KeywordExtractor.new.extract_from(value) if attributeName=='keywords'
|
94
|
+
end
|
93
95
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
end
|
99
|
-
@rich_text_parser.parse(line, @current_paragraph)
|
96
|
+
def parse_paragraph_line(line)
|
97
|
+
if @current_paragraph.nil?
|
98
|
+
@current_paragraph = Paragraph.new
|
99
|
+
@content << @current_paragraph
|
100
100
|
end
|
101
|
+
@rich_text_parser.parse(line, @current_paragraph)
|
102
|
+
end
|
101
103
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
else
|
112
|
-
if (line.begins_with('*'))
|
113
|
-
if @bulleted_list_parser.nil?
|
114
|
-
@bulleted_list_parser = BulletedListParser.new(@rich_text_parser)
|
115
|
-
@content << @bulleted_list_parser.bulleted_list
|
116
|
-
end
|
117
|
-
@bulleted_list_parser.parse_line(line)
|
118
|
-
else
|
119
|
-
parse_paragraph_line(line)
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
104
|
+
# When a bullet point is seen:
|
105
|
+
#* ensure we have a @bulleted_list_parser,
|
106
|
+
#* create one if necessary,
|
107
|
+
#* register its output in the @content.
|
108
|
+
def ensure_bulleted_list_active
|
109
|
+
if @bulleted_list_parser.nil?
|
110
|
+
@bulleted_list_parser = BulletedListParser.new(@rich_text_parser)
|
111
|
+
@content << @bulleted_list_parser.bulleted_list
|
112
|
+
end
|
124
113
|
end
|
114
|
+
|
115
|
+
def ensure_verbatim_active
|
116
|
+
@content << @verbatim = Verbatim.new { @verbatim = nil}
|
117
|
+
end
|
118
|
+
|
119
|
+
def parse_line(line)
|
120
|
+
if @verbatim
|
121
|
+
@verbatim.parse(line)
|
122
|
+
return
|
123
|
+
end
|
124
|
+
|
125
|
+
@bulleted_list_parser = nil unless line.begins_with '*'
|
126
|
+
|
127
|
+
if (line =='')
|
128
|
+
@current_paragraph = nil
|
129
|
+
@bulleted_list = nil
|
130
|
+
return
|
131
|
+
end
|
125
132
|
|
133
|
+
if (line.begins_with('!'))
|
134
|
+
@content << Heading.parse(line)
|
135
|
+
return
|
136
|
+
end
|
137
|
+
|
138
|
+
if (line.begins_with('*'))
|
139
|
+
ensure_bulleted_list_active
|
140
|
+
@bulleted_list_parser.parse_line(line)
|
141
|
+
return
|
142
|
+
end
|
143
|
+
|
144
|
+
if (line == '{{{')
|
145
|
+
ensure_verbatim_active
|
146
|
+
return
|
147
|
+
end
|
148
|
+
|
149
|
+
parse_paragraph_line(line)
|
150
|
+
return
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
126
154
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
module SemanticText
|
3
|
+
|
4
|
+
class Verbatim
|
5
|
+
attr_reader :lines
|
6
|
+
|
7
|
+
@end_of_section_block = {}
|
8
|
+
|
9
|
+
def initialize(&end_of_section_handler)
|
10
|
+
@lines= []
|
11
|
+
@end_of_section_block = end_of_section_handler
|
12
|
+
end
|
13
|
+
|
14
|
+
#Parse line of text into a verbatim block.
|
15
|
+
#Runs end of verbatim handler when end of verbatim found.
|
16
|
+
def parse(line)
|
17
|
+
if line=='}}}'
|
18
|
+
@end_of_section_block.call
|
19
|
+
else
|
20
|
+
@lines << line
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def export_html
|
25
|
+
out = "\n<pre>\n"
|
26
|
+
lines.each {|line | out += CGI.escapeHTML(line)+"\n" }
|
27
|
+
out += "</pre>\n"
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
data/test/test_export.rb
CHANGED
@@ -11,7 +11,7 @@ class TestExport < Test::Unit::TestCase
|
|
11
11
|
|
12
12
|
expected_file=File.new(ENV['SANDBOX']+'/semantictext/testfiles/regression-exportsample.txt')
|
13
13
|
expected = expected_file.readlines
|
14
|
-
|
14
|
+
|
15
15
|
(0..(expected.size-1)).each {|index| assert_equal expected[index],actual[index]+"\n"}
|
16
16
|
|
17
17
|
assert_equal expected.size, actual.size
|
data/testfiles/complex.art
CHANGED
@@ -2,6 +2,13 @@
|
|
2
2
|
<h1>First Big Heading</h1>
|
3
3
|
<p> This is another paragraph.</p>
|
4
4
|
<p> This paragraph tests escaping < > &</p>
|
5
|
+
<pre>
|
6
|
+
verbatim block line 1 < > &
|
7
|
+
verbatim block line 2
|
8
|
+
|
9
|
+
verbatim block line 4
|
10
|
+
</pre>
|
11
|
+
|
5
12
|
<p> Theis is the third paragraph.</p>
|
6
13
|
<h2>second-level heading</h2>
|
7
14
|
<h3>third-level heading</h3>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: semantictext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dafydd Rees
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-06 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -45,6 +45,7 @@ files:
|
|
45
45
|
- lib/semantictext/tag.rb
|
46
46
|
- lib/semantictext/tag_parsing_failed.rb
|
47
47
|
- lib/semantictext/unknown_tag.rb
|
48
|
+
- lib/semantictext/verbatim.rb
|
48
49
|
- lib/semantictext.rb
|
49
50
|
- lib/string.rb
|
50
51
|
- test/test_bullet.rb
|