semantictext 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +2 -0
- data/README.rdoc +24 -7
- data/TODO.rdoc +10 -6
- data/lib/semantictext.rb +1 -1
- data/lib/semantictext/bullet.rb +1 -1
- data/lib/semantictext/default_tag_factory.rb +18 -1
- data/lib/semantictext/{parser.rb → document.rb} +7 -4
- data/lib/semantictext/ftptag.rb +21 -0
- data/lib/semantictext/httptag.rb +14 -0
- data/lib/semantictext/mailtotag.rb +13 -0
- data/lib/semantictext/tag.rb +2 -0
- data/lib/semantictext/tag_parsing_failed.rb +4 -0
- data/lib/semantictext/unknown_tag.rb +4 -0
- data/test/default_tag_factory_test.rb +35 -0
- data/test/export_test.rb +6 -6
- data/test/parser_test.rb +21 -21
- data/testfiles/complex.art +1 -1
- data/testfiles/regression-exportsample.txt +1 -1
- metadata +10 -3
data/CHANGELOG
CHANGED
data/README.rdoc
CHANGED
@@ -4,12 +4,29 @@ It takes a file or sequence of lines and returns an object model of the document
|
|
4
4
|
including document metadata (e.g. doc creation time and title) and a tree of
|
5
5
|
interconnected objects describing the document structure.
|
6
6
|
|
7
|
-
== RDOC API
|
8
|
-
The rdoc can be found at http://www.greenbarsoft.co.uk/software/semantictext/rdoc/
|
9
|
-
|
10
7
|
= How to use it
|
11
|
-
*
|
12
|
-
*
|
8
|
+
* You need to have installed gemcutter.org into your gem locations.
|
9
|
+
* Install with:
|
10
|
+
gem install semantictext
|
11
|
+
* Parse like this:
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'semantictext'
|
15
|
+
|
16
|
+
p = SemanticText::Document.new
|
17
|
+
p.parse 'title: my happy document'
|
18
|
+
p.parse ''
|
19
|
+
p.parse "!I'm a title"
|
20
|
+
p.parse "I'm a paragraph..."
|
21
|
+
p.parse "... yep, I'm still that paragraph."
|
22
|
+
p.parse ''
|
23
|
+
p.parse "* I'm a bullet point"
|
24
|
+
p.parse "* I'm another bullet point"
|
25
|
+
|
26
|
+
puts p.export_html # this is how to export as a HTML fragment
|
27
|
+
|
28
|
+
* *rdoc* http://www.greenbarsoft.co.uk/software/semantictext/rdoc/
|
29
|
+
* *source* http://github.com/dafydd/semantictext
|
13
30
|
|
14
31
|
= Semantic Markup
|
15
32
|
Semantic text supports:
|
@@ -24,10 +41,10 @@ We intend to support these features in future:
|
|
24
41
|
* custom markup tags e.g. postal code, youtube video embed, ...
|
25
42
|
|
26
43
|
|
27
|
-
|
44
|
+
= Compatibility
|
28
45
|
This project is being developed on OS X. Automated testing for Linux will be included in future releases.
|
29
46
|
|
30
|
-
|
47
|
+
= Licence
|
31
48
|
This is open source software and comes with no warranty. See COPYING for details.
|
32
49
|
|
33
50
|
http://www.greenbarsoft.co.uk
|
data/TODO.rdoc
CHANGED
@@ -1,19 +1,23 @@
|
|
1
1
|
==to do
|
2
|
-
* support custom structure tags
|
3
|
-
* improve testing by mocking out tag factory used in tests - consider how/whether to do this
|
4
2
|
* support urls as a special structure tag
|
3
|
+
* tighten up error test cases on ftptag, httptag and mailtotag
|
4
|
+
* improve testing by mocking out tag factory used in tests - consider how/whether to do this
|
5
5
|
* support wikinames as a special custom tag
|
6
|
-
*
|
7
|
-
* build gem
|
8
|
-
* test gem
|
9
|
-
* publish gem
|
6
|
+
* fix path to testfiles so test can run on gem - tests only run on source at the moment
|
10
7
|
|
11
8
|
==maybe
|
9
|
+
* consider including section objects that are delimited by headings and that contain all objects in a section
|
12
10
|
* think about how to support twitter with special structure tags e.g. #keyword and @user
|
13
11
|
* refactor parser into header parser and text parser
|
14
12
|
* pull out parsers for different parts and use the state pattern
|
15
13
|
|
16
14
|
==done
|
15
|
+
* support custom structure tags
|
16
|
+
* escape HTML < > and & on headings
|
17
|
+
* build gem from rakefile
|
18
|
+
* publish gem
|
19
|
+
* build gem
|
20
|
+
* publish rdoc
|
17
21
|
* handle proper HTML escaping
|
18
22
|
* remove html and head elements from html output (it's here to be embedded in webpages)
|
19
23
|
* removed surrounding square brackets from text in Tag objects
|
data/lib/semantictext.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
require 'semantictext/
|
1
|
+
require 'semantictext/document'
|
data/lib/semantictext/bullet.rb
CHANGED
@@ -1,9 +1,26 @@
|
|
1
|
+
require 'semantictext/ftptag.rb'
|
2
|
+
require 'semantictext/httptag.rb'
|
3
|
+
require 'semantictext/mailtotag.rb'
|
4
|
+
require 'semantictext/unknown_tag'
|
5
|
+
|
1
6
|
module SemanticText
|
2
7
|
# I create SemanticText::Tag objects in response to create_tag(name,value) calls
|
3
8
|
# from a SemanticText::Parser
|
4
9
|
class DefaultTagFactory
|
10
|
+
|
11
|
+
def initialize()
|
12
|
+
@taglookup = {}
|
13
|
+
@taglookup['http']=HTTPTag
|
14
|
+
@taglookup['ftp']=FTPTag
|
15
|
+
@taglookup['mailto']=MailToTag
|
16
|
+
end
|
17
|
+
|
5
18
|
def create_tag(name, value)
|
6
|
-
|
19
|
+
target = @taglookup[name]
|
20
|
+
if target.nil?
|
21
|
+
throw UnknownTag.new("#{name}:#{value}")
|
22
|
+
end
|
23
|
+
target.new(name,value)
|
7
24
|
end
|
8
25
|
end
|
9
26
|
end
|
@@ -13,8 +13,11 @@ require 'semantictext/bulleted_list_parser'
|
|
13
13
|
require 'semantictext/rich_text_parser'
|
14
14
|
|
15
15
|
module SemanticText
|
16
|
-
|
17
|
-
|
16
|
+
|
17
|
+
#* I am the root note of a graph of objects that form the document.
|
18
|
+
#* I hold metadata.
|
19
|
+
#* I initiate and co-ordinate document-wide operations.
|
20
|
+
class Document
|
18
21
|
# title of the document
|
19
22
|
attr_reader :title
|
20
23
|
|
@@ -30,13 +33,13 @@ module SemanticText
|
|
30
33
|
# the object model of the parsed document
|
31
34
|
attr_reader :content
|
32
35
|
|
33
|
-
def initialize
|
36
|
+
def initialize(tag_factory=DefaultTagFactory.new)
|
34
37
|
@pathname=nil
|
35
38
|
@headers_completed = false
|
36
39
|
@content = []
|
37
40
|
@current_paragraph = nil
|
38
41
|
@bulleted_list_parser = nil
|
39
|
-
@rich_text_parser = RichTextParser.new(
|
42
|
+
@rich_text_parser = RichTextParser.new(tag_factory)
|
40
43
|
end
|
41
44
|
|
42
45
|
# export as html
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'semantictext/tag'
|
2
|
+
|
3
|
+
module SemanticText
|
4
|
+
class FTPTag < Tag
|
5
|
+
attr_reader :host, :path
|
6
|
+
|
7
|
+
def initialize(key, value)
|
8
|
+
@text = value
|
9
|
+
@key = key
|
10
|
+
@path = value
|
11
|
+
parts = value.split '/',4
|
12
|
+
if !(parts.size==4 && parts[0]=='' && parts[1]=='')
|
13
|
+
raise TagParsingFailed.new("Malformed FTP tag \"#{value}\"")
|
14
|
+
end
|
15
|
+
|
16
|
+
@host = parts[2]
|
17
|
+
@path = '/'+parts[3]
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
data/lib/semantictext/tag.rb
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'semantictext/default_tag_factory'
|
3
|
+
|
4
|
+
class DefaultTagFactoryTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@factory = SemanticText::DefaultTagFactory.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_http
|
11
|
+
tag = @factory.create_tag('http','//www.example.com')
|
12
|
+
assert_equal(SemanticText::HTTPTag, tag.class)
|
13
|
+
assert_equal("http://www.example.com",tag.link)
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_mailto
|
17
|
+
tag = @factory.create_tag('mailto','somebody@example.com')
|
18
|
+
assert_equal(SemanticText::MailToTag, tag.class)
|
19
|
+
assert_equal("somebody@example.com",tag.address)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_ftp
|
23
|
+
tag = @factory.create_tag('ftp','//ftp.funet.fi/pub/standards/RFC/rfc959.txt')
|
24
|
+
assert_equal(SemanticText::FTPTag, tag.class)
|
25
|
+
assert_equal("ftp.funet.fi",tag.host)
|
26
|
+
assert_equal("/pub/standards/RFC/rfc959.txt", tag.path)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_unregistered_tag
|
30
|
+
assert_throws(:"c2:RecentChanges") {
|
31
|
+
@factory.create_tag('c2','RecentChanges')
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
data/test/export_test.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
require 'test/unit'
|
2
|
-
require 'semantictext/
|
2
|
+
require 'semantictext/document'
|
3
3
|
|
4
4
|
class TestExport < Test::Unit::TestCase
|
5
5
|
|
6
6
|
def test_end_to_end_loading
|
7
|
-
unit = SemanticText::
|
7
|
+
unit = SemanticText::Document.new
|
8
8
|
unit.parse_from(ENV['SANDBOX']+'/semantictext/testfiles/complex.art')
|
9
9
|
actual = unit.export_html
|
10
10
|
actual = actual.split /\n/
|
@@ -16,7 +16,7 @@ class TestExport < Test::Unit::TestCase
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def test_escaping_paragraphs
|
19
|
-
unit = SemanticText::
|
19
|
+
unit = SemanticText::Document.new
|
20
20
|
unit.parse ''
|
21
21
|
unit.parse 'escaping test < > &'
|
22
22
|
|
@@ -24,7 +24,7 @@ class TestExport < Test::Unit::TestCase
|
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_escaping_headings
|
27
|
-
unit = SemanticText::
|
27
|
+
unit = SemanticText::Document.new
|
28
28
|
unit.parse ''
|
29
29
|
unit.parse '!heading < > &'
|
30
30
|
|
@@ -32,7 +32,7 @@ class TestExport < Test::Unit::TestCase
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def test_escaping_bullet_points
|
35
|
-
unit = SemanticText::
|
35
|
+
unit = SemanticText::Document.new
|
36
36
|
unit.parse ''
|
37
37
|
unit.parse '* < > &'
|
38
38
|
|
@@ -40,7 +40,7 @@ class TestExport < Test::Unit::TestCase
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def test_escaping_link
|
43
|
-
unit = SemanticText::
|
43
|
+
unit = SemanticText::Document.new
|
44
44
|
unit.parse ''
|
45
45
|
unit.parse 'http://www.example.com/app?name1=value1&name2=value2'
|
46
46
|
|
data/test/parser_test.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'test/unit'
|
2
|
-
require 'semantictext/
|
2
|
+
require 'semantictext/document'
|
3
3
|
|
4
4
|
class TestParser < Test::Unit::TestCase
|
5
5
|
|
@@ -9,7 +9,7 @@ class TestParser < Test::Unit::TestCase
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def test_end_to_end_loading
|
12
|
-
unit = SemanticText::
|
12
|
+
unit = SemanticText::Document.new
|
13
13
|
unit.parse_from(ENV['SANDBOX']+'/semantictext/testfiles/simple.art')
|
14
14
|
assert_equal 'test title', unit.title
|
15
15
|
assert_equal 5, unit.createdAt.day
|
@@ -37,7 +37,7 @@ class TestParser < Test::Unit::TestCase
|
|
37
37
|
|
38
38
|
|
39
39
|
def test_headerless_document_parse
|
40
|
-
unit = SemanticText::
|
40
|
+
unit = SemanticText::Document.new
|
41
41
|
test_lines = <<EOF
|
42
42
|
!First Big Heading
|
43
43
|
|
@@ -54,7 +54,7 @@ EOF
|
|
54
54
|
end
|
55
55
|
|
56
56
|
def test_parse_paragraph_beginning_with_url
|
57
|
-
unit = SemanticText::
|
57
|
+
unit = SemanticText::Document.new
|
58
58
|
test_lines = <<EOF
|
59
59
|
|
60
60
|
http://www.dafydd.net/foogoo?blah see? http://www.example.com
|
@@ -71,7 +71,7 @@ EOF
|
|
71
71
|
end
|
72
72
|
|
73
73
|
def test_headerless_document_parse_with_url
|
74
|
-
unit = SemanticText::
|
74
|
+
unit = SemanticText::Document.new
|
75
75
|
test_lines = <<EOF
|
76
76
|
|
77
77
|
Embedded link http://www.dafydd.net/foogoo?blah see?
|
@@ -95,7 +95,7 @@ EOF
|
|
95
95
|
end
|
96
96
|
|
97
97
|
def test_paragraph_parsing
|
98
|
-
unit = SemanticText::
|
98
|
+
unit = SemanticText::Document.new
|
99
99
|
test_lines = <<EOF
|
100
100
|
title:test title
|
101
101
|
createdAt:5 November 2005
|
@@ -122,7 +122,7 @@ EOF
|
|
122
122
|
end
|
123
123
|
|
124
124
|
def test_parsing_of_parameters
|
125
|
-
unit = SemanticText::
|
125
|
+
unit = SemanticText::Document.new
|
126
126
|
unit.parse('title:test title')
|
127
127
|
unit.parse('createdAt:5 November 2005')
|
128
128
|
unit.parse('keywords: buzz, fuzz, muzz')
|
@@ -140,44 +140,44 @@ EOF
|
|
140
140
|
end
|
141
141
|
|
142
142
|
def test_presendence_of_url_lower_than_tag
|
143
|
-
unit = SemanticText::
|
143
|
+
unit = SemanticText::Document.new
|
144
144
|
unit.parse('')
|
145
145
|
unit.parse('Embedded tag [http://www.dafydd.net/foogoo?blah name:here] see?')
|
146
146
|
|
147
147
|
result = unit.content[0]
|
148
148
|
assert_equal SemanticText::Paragraph, result.class
|
149
149
|
assert_element SemanticText::Span, "Embedded tag ", result.content[0]
|
150
|
-
assert_element SemanticText::
|
150
|
+
assert_element SemanticText::HTTPTag, "//www.dafydd.net/foogoo?blah name:here", result.content[1]
|
151
151
|
assert_element SemanticText::Span, " see?", result.content[2]
|
152
152
|
assert_equal 3, result.content.size
|
153
153
|
end
|
154
154
|
|
155
155
|
def test_headerless_document_parse_with_tags
|
156
|
-
unit = SemanticText::
|
156
|
+
unit = SemanticText::Document.new
|
157
157
|
test_lines = <<EOF
|
158
158
|
|
159
|
-
Embedded tag [
|
159
|
+
Embedded tag [mailto:fred@example.com] see?
|
160
160
|
I wonder if it worked!
|
161
|
-
a [
|
161
|
+
a [ftp://ftp.example.com/stuff/download] b [http://www.example.com] c
|
162
162
|
EOF
|
163
163
|
test_lines.each {|line| unit.parse(line)}
|
164
164
|
|
165
165
|
result = unit.content[0]
|
166
166
|
assert_equal SemanticText::Paragraph, result.class
|
167
167
|
assert_element SemanticText::Span, "Embedded tag ", result.content[0]
|
168
|
-
assert_element SemanticText::
|
168
|
+
assert_element SemanticText::MailToTag, "fred@example.com", result.content[1]
|
169
169
|
assert_element SemanticText::Span, " see?", result.content[2]
|
170
170
|
assert_element SemanticText::Span, "I wonder if it worked!", result.content[3]
|
171
171
|
assert_element SemanticText::Span, 'a ', result.content[4]
|
172
|
-
assert_element SemanticText::
|
172
|
+
assert_element SemanticText::FTPTag, '//ftp.example.com/stuff/download', result.content[5]
|
173
173
|
assert_element SemanticText::Span, ' b ', result.content[6]
|
174
|
-
assert_element SemanticText::
|
174
|
+
assert_element SemanticText::HTTPTag, '//www.example.com', result.content[7]
|
175
175
|
assert_element SemanticText::Span, ' c', result.content[8]
|
176
176
|
assert_equal 9, result.content.size
|
177
177
|
end
|
178
178
|
|
179
179
|
def test_paragraphs_headings_and_bullet_points
|
180
|
-
unit = SemanticText::
|
180
|
+
unit = SemanticText::Document.new
|
181
181
|
unit.parse('')
|
182
182
|
unit.parse('!heading')
|
183
183
|
unit.parse('This is a paragraph')
|
@@ -219,10 +219,10 @@ EOF
|
|
219
219
|
end
|
220
220
|
|
221
221
|
def test_bullet_points_with_urls_and_tags
|
222
|
-
unit = SemanticText::
|
222
|
+
unit = SemanticText::Document.new
|
223
223
|
unit.parse('')
|
224
224
|
unit.parse('* with url http://www.example.com see?')
|
225
|
-
unit.parse('* with tag [
|
225
|
+
unit.parse('* with tag [http://www.example.com] see?')
|
226
226
|
|
227
227
|
actual_list = unit.content[0]
|
228
228
|
first_bullet = actual_list.content[0]
|
@@ -235,12 +235,12 @@ EOF
|
|
235
235
|
assert_element SemanticText::Span, ' see?', first_bullet.content[2]
|
236
236
|
|
237
237
|
assert_element SemanticText::Span, 'with tag ', second_bullet.content[0]
|
238
|
-
assert_element SemanticText::
|
238
|
+
assert_element SemanticText::HTTPTag, '//www.example.com', second_bullet.content[1]
|
239
239
|
assert_element SemanticText::Span, ' see?', second_bullet.content[2]
|
240
240
|
end
|
241
241
|
|
242
242
|
def test_bulleted_list_nesting
|
243
|
-
unit = SemanticText::
|
243
|
+
unit = SemanticText::Document.new
|
244
244
|
unit.parse('')
|
245
245
|
unit.parse('* separate list')
|
246
246
|
unit.parse('** nested bullet point 1')
|
@@ -277,7 +277,7 @@ EOF
|
|
277
277
|
end
|
278
278
|
|
279
279
|
def test_bulleted_lsit_parsing_into_two_separate_lists
|
280
|
-
unit = SemanticText::
|
280
|
+
unit = SemanticText::Document.new
|
281
281
|
unit.parse('')
|
282
282
|
unit.parse('* first bullet in first list')
|
283
283
|
unit.parse('')
|
data/testfiles/complex.art
CHANGED
@@ -11,5 +11,5 @@
|
|
11
11
|
<ul><li> subpoint 2.1</li><li> subpoint 2.2</li>
|
12
12
|
</ul>
|
13
13
|
</ul>
|
14
|
-
<p> This is another paragraph. This is a [
|
14
|
+
<p> This is another paragraph. This is a [http://www.example.com] tag.</p>
|
15
15
|
<p><a href="http://www.example.com/foo?a=b&c=d">http://www.example.com/foo?a=b&c=d</a></p>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: semantictext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dafydd Rees
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-12-02 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -30,21 +30,27 @@ files:
|
|
30
30
|
- lib/semantictext/bulletedlist.rb
|
31
31
|
- lib/semantictext/date_extractor.rb
|
32
32
|
- lib/semantictext/default_tag_factory.rb
|
33
|
+
- lib/semantictext/document.rb
|
33
34
|
- lib/semantictext/extraction_failed.rb
|
35
|
+
- lib/semantictext/ftptag.rb
|
34
36
|
- lib/semantictext/heading.rb
|
37
|
+
- lib/semantictext/httptag.rb
|
35
38
|
- lib/semantictext/keyword_extractor.rb
|
36
39
|
- lib/semantictext/link.rb
|
40
|
+
- lib/semantictext/mailtotag.rb
|
37
41
|
- lib/semantictext/not_header_line.rb
|
38
42
|
- lib/semantictext/paragraph.rb
|
39
|
-
- lib/semantictext/parser.rb
|
40
43
|
- lib/semantictext/rich_text_parser.rb
|
41
44
|
- lib/semantictext/span.rb
|
42
45
|
- lib/semantictext/tag.rb
|
46
|
+
- lib/semantictext/tag_parsing_failed.rb
|
47
|
+
- lib/semantictext/unknown_tag.rb
|
43
48
|
- lib/semantictext.rb
|
44
49
|
- lib/string.rb
|
45
50
|
- test/bullet_test.rb
|
46
51
|
- test/bulleted_list_parser_test.rb
|
47
52
|
- test/dateextractor_test.rb
|
53
|
+
- test/default_tag_factory_test.rb
|
48
54
|
- test/export_test.rb
|
49
55
|
- test/keywordextractor_test.rb
|
50
56
|
- test/parser_test.rb
|
@@ -87,6 +93,7 @@ test_files:
|
|
87
93
|
- ./test/bullet_test.rb
|
88
94
|
- ./test/bulleted_list_parser_test.rb
|
89
95
|
- ./test/dateextractor_test.rb
|
96
|
+
- ./test/default_tag_factory_test.rb
|
90
97
|
- ./test/export_test.rb
|
91
98
|
- ./test/keywordextractor_test.rb
|
92
99
|
- ./test/parser_test.rb
|