semantictext 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.rdoc ADDED
@@ -0,0 +1,11 @@
1
+ == CHANGELOG
2
+
3
+ === 0.2.1
4
+ 2009-12-05: added support for multi-level headings e.g. "!!second level heading"
5
+
6
+ === 0.2.0
7
+ 2009-12-03: replaced ST::Link with ST::HttpTag, ST::MailToTag and ST::FtpTag
8
+ 2009-12-02: renamed SemanticText::Parser to SemanticText::Document
9
+ 2009-12-02: added support for arbitrary tags
10
+ === 0.0.1
11
+
data/README.rdoc CHANGED
@@ -1,10 +1,10 @@
1
- = Semantic Text
1
+ == Semantic Text
2
2
  Semantic Text is a Domain-Specific text markup parser.
3
3
  It takes a file or sequence of lines and returns an object model of the document,
4
4
  including document metadata (e.g. doc creation time and title) and a tree of
5
5
  interconnected objects describing the document structure.
6
6
 
7
- = How to use it
7
+ === How to use it
8
8
  * You need to have installed gemcutter.org into your gem locations.
9
9
  * Install with:
10
10
  gem install semantictext
@@ -27,8 +27,9 @@ interconnected objects describing the document structure.
27
27
 
28
28
  * *rdoc* http://www.greenbarsoft.co.uk/software/semantictext/rdoc/
29
29
  * *source* http://github.com/dafydd/semantictext
30
+ * To build me, set an environment variable called *SANDBOX* to the directory above your semantictext directory. The tests need this to access test data.
30
31
 
31
- = Semantic Markup
32
+ === Semantic Markup
32
33
  Semantic text supports:
33
34
  * document metadata
34
35
  * section headers
@@ -36,15 +37,13 @@ Semantic text supports:
36
37
  * paragraphs that contain markup tags
37
38
  * inline hyperlinks for http: mailto: and ftp:
38
39
  * markup tags within bullet points
40
+ * custom markup tags e.g. postal code, youtube video embed, ... whatever you define in a subclass of SemanticText::DefaultTagFactory
39
41
 
40
- We intend to support these features in future:
41
- * custom markup tags e.g. postal code, youtube video embed, ...
42
42
 
43
-
44
- = Compatibility
43
+ === Compatibility
45
44
  This project is being developed on OS X. Automated testing for Linux will be included in future releases.
46
45
 
47
- = Licence
46
+ === Licence
48
47
  This is open source software and comes with no warranty. See COPYING for details.
49
48
 
50
49
  http://www.greenbarsoft.co.uk
data/TODO.rdoc CHANGED
@@ -1,7 +1,7 @@
1
1
  ==to do
2
- * support urls as a special structure tag
3
- * tighten up error test cases on ftptag, httptag and mailtotag
4
2
  * improve testing by mocking out tag factory used in tests - consider how/whether to do this
3
+ * tighten up error test cases on ftptag, httptag and mailtotag
4
+ * add verbatim code sections between {{{ and }}} brackets at start of line
5
5
  * support wikinames as a special custom tag
6
6
  * fix path to testfiles so test can run on gem - tests only run on source at the moment
7
7
 
@@ -12,6 +12,10 @@
12
12
  * pull out parsers for different parts and use the state pattern
13
13
 
14
14
  ==done
15
+ * added basic rcov test coverage task to Rakefile
16
+ * find a way to generate demo.txt file from rake
17
+ * make demo.txt be up-to-date as dependency of rdoc
18
+ * replace SemanticText::Link with SemanticText::HTTPTag, SemanticText::MailToTag and SemanticText::FTPTag
15
19
  * support custom structure tags
16
20
  * escape HTML < > and & on headings
17
21
  * build gem from rakefile
data/doc/demo.script ADDED
@@ -0,0 +1,13 @@
1
+ require 'semantictext'
2
+
3
+ p = SemanticText::Document.new
4
+ p.parse 'title: my happy document'
5
+ p.parse ''
6
+ p.parse "!I'm a title"
7
+ p.parse "I'm a paragraph..."
8
+ p.parse "... yep, I'm still that paragraph."
9
+ p.parse ''
10
+ p.parse "* I'm a bullet point"
11
+ p.parse "* I'm another bullet point"
12
+
13
+ puts p.export_html # this is how to export as a HTML fragment
@@ -5,7 +5,7 @@ require 'semantictext/unknown_tag'
5
5
 
6
6
  module SemanticText
7
7
  # I create SemanticText::Tag objects in response to create_tag(name,value) calls
8
- # from a SemanticText::Parser
8
+ # from a SemanticText::Document
9
9
  class DefaultTagFactory
10
10
 
11
11
  def initialize()
@@ -4,7 +4,6 @@ require 'semantictext/keyword_extractor'
4
4
  require 'semantictext/not_header_line'
5
5
  require 'semantictext/paragraph'
6
6
  require 'semantictext/span'
7
- require 'semantictext/link'
8
7
  require 'semantictext/tag'
9
8
  require 'string'
10
9
  require 'semantictext/bulletedlist'
@@ -108,7 +107,7 @@ module SemanticText
108
107
  return
109
108
  end
110
109
  if (line.begins_with('!'))
111
- @content << Heading.new(line[1,line.size-1])
110
+ @content << Heading.parse(line)
112
111
  else
113
112
  if (line.begins_with('*'))
114
113
  if @bulleted_list_parser.nil?
@@ -1,4 +1,5 @@
1
1
  require 'semantictext/tag'
2
+ require 'semantictext/tag_parsing_failed'
2
3
 
3
4
  module SemanticText
4
5
  class FTPTag < Tag
@@ -1,14 +1,20 @@
1
1
  module SemanticText
2
2
  class Heading
3
- attr_reader :text
3
+ attr_reader :text, :depth
4
4
 
5
- def initialize(aTitle)
5
+ def self.parse(line)
6
+ line =~ /(!+)(.*)/
7
+ Heading.new($2,$1.size)
8
+ end
9
+
10
+ def initialize(aTitle, depth)
6
11
  @text = aTitle
12
+ @depth = depth
7
13
  end
8
14
 
9
15
  #export as html
10
16
  def export_html
11
- "\n<h1>#{ CGI.escapeHTML(@text)}</h1>"
17
+ "\n<h#{@depth}>#{ CGI.escapeHTML(@text)}</h#{@depth}>"
12
18
  end
13
19
 
14
20
  end
@@ -9,6 +9,6 @@ module SemanticText
9
9
  @key = key
10
10
  @link = key+':'+value
11
11
  end
12
-
12
+
13
13
  end
14
14
  end
@@ -7,6 +7,7 @@ module SemanticText
7
7
  def initialize(key, value)
8
8
  @text = value
9
9
  @address = @value = value
10
+ @key = key
10
11
  end
11
12
 
12
13
  end
@@ -1,5 +1,4 @@
1
1
  require 'semantictext/span'
2
- require 'semantictext/link'
3
2
  require 'semantictext/tag'
4
3
 
5
4
  require 'string'
@@ -15,7 +14,15 @@ module SemanticText
15
14
  @tag_factory = tag_factory
16
15
  end
17
16
 
18
- private
17
+ private
18
+
19
+ def create_tag(regex, source)
20
+ source =~ regex
21
+ tag_name = $1
22
+ tag_value = $2
23
+ @tag_factory.create_tag(tag_name, tag_value)
24
+ end
25
+
19
26
  def parse_text_for_urls(text, enclosing_element)
20
27
  link_next = false
21
28
  ignore_next_section = false
@@ -29,7 +36,7 @@ module SemanticText
29
36
  ignore_next_section = false
30
37
  else
31
38
  if (link_next)
32
- enclosing_element << Link.new(section)
39
+ enclosing_element << create_tag(/([^:]+):([^\]]*)/, section)
33
40
  ignore_next_section = true
34
41
  else
35
42
  enclosing_element << Span.new(section)
@@ -40,15 +47,14 @@ module SemanticText
40
47
  end
41
48
 
42
49
  public
50
+
51
+ # I parse a line of text, pushing the elements into enclosing_element as I find them.
43
52
  def parse(line, enclosing_element)
44
53
  sections = line.split /(\[[^:]+:[^\]]+\])/
45
54
  tag_next = false
46
55
  sections.each do |section|
47
56
  if (tag_next)
48
- section =~ /\[([^:]+):([^\]]*)\]/
49
- tag_name = $1
50
- tag_value = $2
51
- enclosing_element << @tag_factory.create_tag(tag_name, tag_value)
57
+ enclosing_element << create_tag(/\[([^:]+):([^\]]*)\]/, section)
52
58
  else
53
59
  parse_text_for_urls(section, enclosing_element)
54
60
  end
@@ -9,9 +9,9 @@ module SemanticText
9
9
  @key = key
10
10
  end
11
11
 
12
- #export as html
13
- def export_html
14
- "[#{@key}:#{@text}]"
12
+ # export as html
13
+ def export_html
14
+ "<a href=\"#{key}:#{text}\">#{key}:#{CGI.escapeHTML(text)}</a>"
15
15
  end
16
16
 
17
17
  end
@@ -15,9 +15,9 @@ class BulletTest < Test::Unit::TestCase
15
15
  unit = SemanticText::Bullet.new(test_string, 1, SemanticText::RichTextParser.new(SemanticText::DefaultTagFactory.new))
16
16
  assert_equal(1, unit.depth)
17
17
  assert_element SemanticText::Span, "beginning ", unit.content[0]
18
- assert_element SemanticText::Link, "http://www.example.com", unit.content[1]
18
+ assert_element SemanticText::HTTPTag, "//www.example.com", unit.content[1]
19
19
  assert_element SemanticText::Span, " moretext ", unit.content[2]
20
- assert_element SemanticText::Link, "http://www.dafydd.net", unit.content[3]
20
+ assert_element SemanticText::HTTPTag, "//www.dafydd.net", unit.content[3]
21
21
  assert_element SemanticText::Span, " ending text", unit.content[4]
22
22
  end
23
23
 
@@ -1,7 +1,7 @@
1
1
  require 'test/unit'
2
2
  require 'semantictext/document'
3
3
 
4
- class TestParser < Test::Unit::TestCase
4
+ class TestDocument < Test::Unit::TestCase
5
5
 
6
6
  def assert_element(element_class, text, actual)
7
7
  assert_equal element_class, actual.class
@@ -63,9 +63,9 @@ EOF
63
63
  test_lines.each {|line| unit.parse(line)}
64
64
  result = unit.content[0]
65
65
  assert_equal SemanticText::Paragraph, result.class
66
- assert_element SemanticText::Link, "http://www.dafydd.net/foogoo?blah", result.content[0]
66
+ assert_element SemanticText::HTTPTag, "//www.dafydd.net/foogoo?blah", result.content[0]
67
67
  assert_element SemanticText::Span, " see? ", result.content[1]
68
- assert_element SemanticText::Link, "http://www.example.com", result.content[2]
68
+ assert_element SemanticText::HTTPTag, "//www.example.com", result.content[2]
69
69
  assert_element SemanticText::Span, "I wonder if it worked!", result.content[3]
70
70
  assert_equal 4, result.content.size
71
71
  end
@@ -76,20 +76,20 @@ EOF
76
76
 
77
77
  Embedded link http://www.dafydd.net/foogoo?blah see?
78
78
  I wonder if it worked!
79
- a mailto:foogoo b ftp://asdfasdfasdf c
79
+ a mailto:foogoo b ftp://host/path c
80
80
  EOF
81
81
  test_lines.each {|line| unit.parse(line)}
82
82
 
83
83
  result = unit.content[0]
84
84
  assert_equal SemanticText::Paragraph, result.class
85
85
  assert_element SemanticText::Span, "Embedded link ", result.content[0]
86
- assert_element SemanticText::Link, "http://www.dafydd.net/foogoo?blah", result.content[1]
86
+ assert_element SemanticText::HTTPTag, "//www.dafydd.net/foogoo?blah", result.content[1]
87
87
  assert_element SemanticText::Span, " see?", result.content[2]
88
88
  assert_element SemanticText::Span, "I wonder if it worked!", result.content[3]
89
89
  assert_element SemanticText::Span, 'a ', result.content[4]
90
- assert_element SemanticText::Link, 'mailto:foogoo', result.content[5]
90
+ assert_element SemanticText::MailToTag, 'foogoo', result.content[5]
91
91
  assert_element SemanticText::Span, ' b ', result.content[6]
92
- assert_element SemanticText::Link, 'ftp://asdfasdfasdf', result.content[7]
92
+ assert_element SemanticText::FTPTag, '//host/path', result.content[7]
93
93
  assert_element SemanticText::Span, ' c', result.content[8]
94
94
  assert_equal 9, result.content.size
95
95
  end
@@ -176,6 +176,31 @@ EOF
176
176
  assert_equal 9, result.content.size
177
177
  end
178
178
 
179
+ def test_heading_parsing
180
+ unit = SemanticText::Document.new
181
+ unit.parse ''
182
+ unit.parse '!1st level heading'
183
+ unit.parse '!!2nd level heading'
184
+ unit.parse '!!!3rd level heading'
185
+
186
+ first_heading = unit.content[0]
187
+ second_heading = unit.content[1]
188
+ third_heading = unit.content[2]
189
+
190
+ assert_equal "1st level heading", first_heading.text
191
+ assert_equal SemanticText::Heading, first_heading.class
192
+ assert_equal 1, first_heading.depth
193
+
194
+
195
+ assert_equal "2nd level heading", second_heading.text
196
+ assert_equal SemanticText::Heading, second_heading.class
197
+ assert_equal 2, second_heading.depth
198
+
199
+ assert_equal "3rd level heading", third_heading.text
200
+ assert_equal SemanticText::Heading, third_heading.class
201
+ assert_equal 3, third_heading.depth
202
+ end
203
+
179
204
  def test_paragraphs_headings_and_bullet_points
180
205
  unit = SemanticText::Document.new
181
206
  unit.parse('')
@@ -231,7 +256,7 @@ EOF
231
256
  assert_equal SemanticText::BulletedList, actual_list.class
232
257
 
233
258
  assert_element SemanticText::Span, 'with url ', first_bullet.content[0]
234
- assert_element SemanticText::Link, 'http://www.example.com', first_bullet.content[1]
259
+ assert_element SemanticText::HTTPTag, '//www.example.com', first_bullet.content[1]
235
260
  assert_element SemanticText::Span, ' see?', first_bullet.content[2]
236
261
 
237
262
  assert_element SemanticText::Span, 'with tag ', second_bullet.content[0]
@@ -13,6 +13,8 @@ class TestExport < Test::Unit::TestCase
13
13
  expected = expected_file.readlines
14
14
 
15
15
  (0..(expected.size-1)).each {|index| assert_equal expected[index],actual[index]+"\n"}
16
+
17
+ assert_equal expected.size, actual.size
16
18
  end
17
19
 
18
20
  def test_escaping_paragraphs
@@ -11,6 +11,9 @@ This paragraph tests escaping < > &
11
11
 
12
12
  Theis is the third paragraph.
13
13
 
14
+ !!second-level heading
15
+ !!!third-level heading
16
+
14
17
  Hey dude, check out my website:
15
18
  http://www.example.com Cool innit?
16
19
 
@@ -26,3 +29,7 @@ http://www.example.com Cool innit?
26
29
  This is another paragraph. This is a [http://www.example.com] tag.
27
30
 
28
31
  http://www.example.com/foo?a=b&c=d
32
+
33
+ ftp://host/path
34
+
35
+ mailto:fred@example.com
@@ -3,6 +3,8 @@
3
3
  <p> This is another paragraph.</p>
4
4
  <p> This paragraph tests escaping &lt; &gt; &amp;</p>
5
5
  <p> Theis is the third paragraph.</p>
6
+ <h2>second-level heading</h2>
7
+ <h3>third-level heading</h3>
6
8
  <p> Hey dude, check out my website: <a href="http://www.example.com">http://www.example.com</a> Cool innit?</p>
7
9
  <h1>Second Big Section &lt; &gt; &amp;</h1>
8
10
  <ul><li> point 1</li>
@@ -11,5 +13,7 @@
11
13
  <ul><li> subpoint 2.1</li><li> subpoint 2.2</li>
12
14
  </ul>
13
15
  </ul>
14
- <p> This is another paragraph. This is a [http://www.example.com] tag.</p>
16
+ <p> This is another paragraph. This is a <a href="http://www.example.com">http://www.example.com</a> tag.</p>
15
17
  <p><a href="http://www.example.com/foo?a=b&c=d">http://www.example.com/foo?a=b&amp;c=d</a></p>
18
+ <p><a href="ftp://host/path">ftp://host/path</a></p>
19
+ <p><a href="mailto:fred@example.com">mailto:fred@example.com</a></p>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: semantictext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dafydd Rees
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-02 00:00:00 +00:00
12
+ date: 2009-12-05 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -20,11 +20,12 @@ executables: []
20
20
  extensions: []
21
21
 
22
22
  extra_rdoc_files:
23
- - CHANGELOG
23
+ - CHANGELOG.rdoc
24
24
  - COPYING
25
25
  - README.rdoc
26
26
  - TODO.rdoc
27
27
  files:
28
+ - doc/demo.script
28
29
  - lib/semantictext/bullet.rb
29
30
  - lib/semantictext/bulleted_list_parser.rb
30
31
  - lib/semantictext/bulletedlist.rb
@@ -36,7 +37,6 @@ files:
36
37
  - lib/semantictext/heading.rb
37
38
  - lib/semantictext/httptag.rb
38
39
  - lib/semantictext/keyword_extractor.rb
39
- - lib/semantictext/link.rb
40
40
  - lib/semantictext/mailtotag.rb
41
41
  - lib/semantictext/not_header_line.rb
42
42
  - lib/semantictext/paragraph.rb
@@ -47,17 +47,17 @@ files:
47
47
  - lib/semantictext/unknown_tag.rb
48
48
  - lib/semantictext.rb
49
49
  - lib/string.rb
50
- - test/bullet_test.rb
51
- - test/bulleted_list_parser_test.rb
52
- - test/dateextractor_test.rb
53
- - test/default_tag_factory_test.rb
54
- - test/export_test.rb
55
- - test/keywordextractor_test.rb
56
- - test/parser_test.rb
50
+ - test/test_bullet.rb
51
+ - test/test_bulleted_list_parser.rb
52
+ - test/test_dateextractor.rb
53
+ - test/test_default_tag_factory.rb
54
+ - test/test_document.rb
55
+ - test/test_export.rb
56
+ - test/test_keywordextractor.rb
57
57
  - testfiles/complex.art
58
58
  - testfiles/regression-exportsample.txt
59
59
  - testfiles/simple.art
60
- - CHANGELOG
60
+ - CHANGELOG.rdoc
61
61
  - COPYING
62
62
  - README.rdoc
63
63
  - TODO.rdoc
@@ -90,10 +90,10 @@ signing_key:
90
90
  specification_version: 3
91
91
  summary: Domain-Specific text markup parser
92
92
  test_files:
93
- - ./test/bullet_test.rb
94
- - ./test/bulleted_list_parser_test.rb
95
- - ./test/dateextractor_test.rb
96
- - ./test/default_tag_factory_test.rb
97
- - ./test/export_test.rb
98
- - ./test/keywordextractor_test.rb
99
- - ./test/parser_test.rb
93
+ - ./test/test_bullet.rb
94
+ - ./test/test_bulleted_list_parser.rb
95
+ - ./test/test_dateextractor.rb
96
+ - ./test/test_default_tag_factory.rb
97
+ - ./test/test_document.rb
98
+ - ./test/test_export.rb
99
+ - ./test/test_keywordextractor.rb
data/CHANGELOG DELETED
@@ -1,2 +0,0 @@
1
- 2009-12-02: renamed SemanticText::Parser to SemanticText::Document
2
- 2009-12-02: added support for arbitrary tags
@@ -1,9 +0,0 @@
1
- require 'cgi'
2
- module SemanticText
3
- class Link < Span
4
- # export as html
5
- def export_html
6
- "<a href=\"#{text}\">#{CGI.escapeHTML(text)}</a>"
7
- end
8
- end
9
- end