pink_shirt 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,38 @@
1
+ class PinkShirt
2
+ class SAX::Tables < SAX::Base
3
+ TAGS = %w(table th tr td)
4
+
5
+ def start_table attrs
6
+ @output << "table#{add_attributes(attrs)}.\n" if add_attributes(attrs)
7
+ end
8
+
9
+ def end_table
10
+ end
11
+
12
+ def start_tr attrs
13
+ end
14
+
15
+ def end_tr
16
+ @output << "|\n"
17
+ end
18
+
19
+ def start_th attrs
20
+ @output << "|#{add_attributes(attrs)}_. "
21
+ end
22
+
23
+ def end_th
24
+ end
25
+
26
+ # | or |(someclass). , never |.
27
+ def start_td attrs
28
+ if add_attributes(attrs)
29
+ @output << "|#{add_attributes(attrs)}. "
30
+ else
31
+ @output << "|"
32
+ end
33
+ end
34
+
35
+ def end_td
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,75 @@
1
+ class PinkShirt
2
+ # SAX inherits from nokogiri's SAX::Document class
3
+ # I'd recommend you visit nokogiri's documentation to learn more.
4
+ #
5
+ # here's the gist.
6
+ # each time the parser encounter's an opening tag it fires an event 'start_element'
7
+ # each time the parser encounters a closing tag it fires an even 'end_element'
8
+ #
9
+ # I've segmented out the responsibilities over different tags to different classes
10
+ # but the idea is that a class recieves a call to #start_html when <html> is reached
11
+ # and a call to #end_html when </html> is reached, same for all the other tags
12
+ #
13
+ # each of the processors are classes that are responsible for a specific subset of tags
14
+
15
+
16
+ class SAX < Nokogiri::XML::SAX::Document
17
+ def initialize(fail_on_unknown=false)
18
+ @fail_on_unknown = fail_on_unknown
19
+ @processors = [
20
+ PinkShirt::SAX::Basic,
21
+ PinkShirt::SAX::BoilerPlate,
22
+ PinkShirt::SAX::Lists,
23
+ PinkShirt::SAX::Links,
24
+ PinkShirt::SAX::Images,
25
+ PinkShirt::SAX::BlockLevel,
26
+ PinkShirt::SAX::Preformatted,
27
+ PinkShirt::SAX::Tables,
28
+ PinkShirt::SAX::Acronym,
29
+ PinkShirt::SAX::Script
30
+ ]
31
+ @running ||= {}
32
+ @output = PinkShirt::Output.new
33
+ @flags = PinkShirt::Flags.new
34
+ end
35
+
36
+ def to_textile
37
+ @output.to_s
38
+ end
39
+
40
+ def get_processor(name)
41
+ klass = @processors.find{|processor|
42
+ processor::TAGS.include? name
43
+ }
44
+ return @running[klass] if @running[klass]
45
+ if klass
46
+ @running[klass] = klass.new(@output, @flags)
47
+ else
48
+ raise StandardError, "unrecognised tag #{name}" if @fail_on_unknown
49
+ end
50
+ @running[klass]
51
+ end
52
+
53
+ def start_element name, attrs = []
54
+ attrs = Hash[attrs]
55
+ get_processor(name).send("start_#{name}", attrs)
56
+ end
57
+
58
+ def end_element name
59
+ get_processor(name).send("end_#{name}")
60
+
61
+ end
62
+
63
+ def characters(string)
64
+ plaintext(string)
65
+ end
66
+
67
+ def plaintext(string)
68
+ if @flags.pre == true
69
+ @output << string
70
+ else
71
+ @output << string.gsub(/[\t\n]/, "")
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,3 @@
1
+ class PinkShirt
2
+ VERSION = "0.0.1"
3
+ end
data/lib/pink_shirt.rb ADDED
@@ -0,0 +1,49 @@
1
+ require 'nokogiri'
2
+
3
+ # An html2textile converter
4
+ # PinkShirt Undoes RedCloth
5
+ #
6
+ #
7
+ # Usage
8
+ # ---------------------------------
9
+ # html = "stuff <b>goood</b> stuff"
10
+ # PinkShirt.new('stuff').to_textile
11
+ #
12
+ # Internals
13
+ # ----------------------------------
14
+ # PinkShirt uses a sax parser built on nokogiri
15
+
16
+
17
+ class PinkShirt
18
+
19
+ def initialize(html)
20
+ @html = html
21
+ @sax_syntax = PinkShirt::SAX.new
22
+ @parser = Nokogiri::HTML::SAX::Parser.new(@sax_syntax)
23
+ end
24
+
25
+ def to_textile
26
+ @parser.parse(@html)do |config|
27
+ config.replace_entities = false
28
+ end
29
+ @sax_syntax.to_textile
30
+ end
31
+ end
32
+
33
+ require "pink_shirt/version"
34
+ require 'pink_shirt/sax'
35
+ require 'pink_shirt/flags'
36
+ require 'pink_shirt/output'
37
+ require 'pink_shirt/entities'
38
+ require 'pink_shirt/attributes'
39
+ require 'pink_shirt/sax/base'
40
+ require 'pink_shirt/sax/basic'
41
+ require 'pink_shirt/sax/block_level'
42
+ require 'pink_shirt/sax/preformatted'
43
+ require 'pink_shirt/sax/lists'
44
+ require 'pink_shirt/sax/links'
45
+ require 'pink_shirt/sax/images'
46
+ require 'pink_shirt/sax/acronym'
47
+ require 'pink_shirt/sax/script'
48
+ require 'pink_shirt/sax/tables'
49
+ require 'pink_shirt/sax/boiler_plate'
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "pink_shirt/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "pink_shirt"
7
+ s.version = PinkShirt::VERSION
8
+ s.authors = ["Graeme Worthy"]
9
+ s.email = ["graemeworthy@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{An Html to Textile Converter}
12
+ s.description = %q{Converts Html to Textile, or as some say 'html2textile', it's built on nokogiri''}
13
+
14
+ s.rubyforge_project = "pink_shirt"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+
22
+ s.add_development_dependency "RedCloth"
23
+ s.add_development_dependency "rspec"
24
+ s.add_runtime_dependency "nokogiri"
25
+ end
@@ -0,0 +1,16 @@
1
+ require './spec/spec_helper'
2
+
3
+ describe 'PinkShirt against redcloth in reality' do
4
+
5
+ examples = YAML::load(File.read('./spec/examples/basics.yaml'))
6
+
7
+ examples.each do |heading, example|
8
+ it "#{heading} #{example['desc']}" do
9
+ html = RedCloth.new(example['textile']).to_html.chomp
10
+ decoded = PinkShirt.new(html).to_textile.to_s.chomp.chomp
11
+ actual = example['textile']
12
+ decoded.should == actual
13
+ end
14
+ end
15
+
16
+ end
@@ -0,0 +1,172 @@
1
+ Headings:
2
+ desc: headers
3
+ textile: |-
4
+ h1. This is a Heading 1
5
+
6
+ h2. Heading 2 gets more specific
7
+ html: |-
8
+ <h1>This is a Heading 1</h1>
9
+
10
+ <h2>Heading 2 gets more specific</h2>
11
+
12
+ Paragraphs:
13
+ desc: Paragraphs should all have the p.
14
+ textile: |-
15
+ this is just a normal paragraph
16
+ html: |-
17
+ <p>this is just a normal paragraph</p>
18
+ UL Lists:
19
+ desc: a simple list
20
+ textile: |-
21
+ * 1
22
+ * 2
23
+ html: |-
24
+ <ul>
25
+ <li>1</li>
26
+ <li>2</li>
27
+ </ul>
28
+ OL Lists:
29
+ desc: a simple list
30
+ textile: |-
31
+ # 1
32
+ # 2
33
+ html: |-
34
+ <ol>
35
+ <li>1</li>
36
+ <li>2</li>
37
+ </ol>
38
+ Fonty Markup:
39
+ desc: bold italic and the rest
40
+ textile: |-
41
+ *b*
42
+ *strong*
43
+ ??cite??
44
+ _i_
45
+ _em_
46
+ -del-
47
+ +ins+
48
+ ~sub~
49
+ ^sup^
50
+ %span%
51
+ @code@
52
+ html: |-
53
+ <strong>b</strong><br />
54
+ <strong>strong</strong><br />
55
+ <cite>cite</cite><br />
56
+ <em>i</em><br />
57
+ <em>em</em><br />
58
+ <del>del</del><br />
59
+ <ins>ins</ins><br />
60
+ <sub>sub</sub><br />
61
+ <sup>sup</sup><br />
62
+ <span>span</span><br />
63
+ <code>code</code>
64
+
65
+ Nested Lists:
66
+ desc: list nesting is hard
67
+ textile: |-
68
+ * 1
69
+ * 2
70
+ ** a
71
+ ** b
72
+ ** c
73
+ * 3
74
+ html: |-
75
+ <ul>
76
+ <li>1</li>
77
+ <li>2</li>
78
+ <li>
79
+ <ul>
80
+ <li>a</li>
81
+ <li>b</li>
82
+ <li>c</li>
83
+ </ul>
84
+ </li>
85
+ <li>3</li>
86
+ </ul>
87
+
88
+ Nested Mixed Lists:
89
+ desc: mixed lists are easy
90
+ textile: |-
91
+ # 1
92
+ # 2
93
+ ** a
94
+ ** b
95
+ ** c
96
+ # 3
97
+ html: |-
98
+ <ol>
99
+ <li>1</li>
100
+ <li>2
101
+ <ul>
102
+ <li>a</li>
103
+ <li>b</li>
104
+ <li>c</li>
105
+ </ul>
106
+ <li>3</li>
107
+ </ol>
108
+
109
+ Tables:
110
+ desc: plain Tables
111
+ textile: |-
112
+ |a|table|row|
113
+ |a|table|row|
114
+ html: |-
115
+ <table>
116
+ <tr>
117
+ <td>a</td>
118
+ <td>table</td>
119
+ <td>row</td>
120
+ </tr>
121
+ <tr>
122
+ <td>a</td>
123
+ <td>table</td>
124
+ <td>row</td>
125
+ </tr>
126
+ </table>
127
+
128
+ Tables With headers:
129
+ desc: a table with a header
130
+ textile: |-
131
+ |_. a|_. table|_. header|
132
+ |a|table|row|
133
+ |a|table|row|
134
+ html: |-
135
+ <table>
136
+ <tr>
137
+ <th>a</th>
138
+ <th>table</th>
139
+ <th>header</th>
140
+ </tr>
141
+ <tr>
142
+ <td>a</td>
143
+ <td>table</td>
144
+ <td>row</td>
145
+ </tr>
146
+ <tr>
147
+ <td>a</td>
148
+ <td>table</td>
149
+ <td>row</td>
150
+ </tr>
151
+ </table>
152
+
153
+ Links:
154
+ desc: the backbone of the internet
155
+ textile: |-
156
+ "An Example Page":http://www.example.com
157
+ html: |-
158
+ <a href='http://www.example.com'>An Example Page</a>
159
+
160
+ Images:
161
+ desc: for kittens
162
+ textile: |-
163
+ !http://www.kittenexpress.com!
164
+ html: |-
165
+ <img src='http://www.kittenexpress.com'>
166
+
167
+ Scripts:
168
+ desc: ignore all script tags
169
+ textile: |-
170
+
171
+ html: |-
172
+ <script>pants</script>
@@ -0,0 +1,17 @@
1
+ require './spec/spec_helper'
2
+ require './spec/textile_spec'
3
+
4
+ describe 'do everything undo it, check it' do
5
+ examples = TextileSpec.new().flat
6
+ examples.each do |example|
7
+ it example['desc'] do
8
+ original_input = example['input']
9
+ original_output = example['output']
10
+ generated_html = RedCloth.new(example['input']).to_html
11
+ generated_textile = PinkShirt.new(generated_html).to_textile
12
+
13
+ generated_textile.should == original_input
14
+ end
15
+ end
16
+ end
17
+
@@ -0,0 +1,65 @@
1
+ require './spec/spec_helper'
2
+ require './spec/textile_spec'
3
+
4
+ describe 'the official textile spec' do
5
+ describe "Page Layout" do
6
+ examples = TextileSpec.new('Page Layout').flat
7
+
8
+ examples.each do |example|
9
+ it example['desc'], :input => example[:input] do
10
+ expected = PinkShirt.new(example['output']).to_textile
11
+ actual = example['input']
12
+ expected.should == actual
13
+ end
14
+ end
15
+ end
16
+
17
+ describe "Pagagraph Text" do
18
+ examples = TextileSpec.new('Writing Paragraph Text').flat
19
+
20
+ examples.each do |example|
21
+ it example['desc'], :input => example[:input] do
22
+ expected = PinkShirt.new(example['output']).to_textile
23
+ actual = example['input']
24
+ expected.should == actual
25
+ end
26
+ end
27
+ end
28
+
29
+ describe "Phrase Modifiers" do
30
+ examples = TextileSpec.new('Phrase modifiers').flat
31
+
32
+ examples.each do |example|
33
+ it example['desc'], :input => example[:input] do
34
+ expected = PinkShirt.new(example['output']).to_textile
35
+ actual = example['input']
36
+ expected.should == actual
37
+ end
38
+ end
39
+ end
40
+
41
+ describe "Attributes" do
42
+ examples = TextileSpec.new('Attributes').flat
43
+
44
+ examples.each do |example|
45
+ it example['desc'], :input => example[:input] do
46
+ expected = PinkShirt.new(example['output']).to_textile
47
+ actual = example['input']
48
+ expected.should == actual
49
+ end
50
+ end
51
+ end
52
+
53
+ describe "HTML Integration and Escapement" do
54
+ examples = TextileSpec.new('HTML Integration and Escapement').flat
55
+
56
+ examples.each do |example|
57
+ it example['desc'], :input => example[:input] do
58
+ expected = PinkShirt.new(example['output']).to_textile
59
+ actual = example['input']
60
+ expected.should == actual
61
+ end
62
+ end
63
+ end
64
+ end
65
+
@@ -0,0 +1,3 @@
1
+ require './lib/pink_shirt'
2
+ require 'yaml'
3
+ require 'redcloth'
@@ -0,0 +1,6 @@
1
+ == 0.0.1
2
+
3
+ * Initial import of http://redcloth.org/textile examples and conversion
4
+ to YAML. Still needs manual repair.
5
+ * Scrubbed RedCloth references
6
+ * Described format in the README
@@ -0,0 +1,55 @@
1
+ h1. Textile Specifications
2
+
3
+ These are the specifications for the Textile markup language. Implementations (libraries for PHP, Ruby, Python, etc.) can use them in their tests to make sure they are compliant and they are also used to generate the online Textile documentation.
4
+
5
+ h2. Organization
6
+
7
+ The files herein each specify an aspect of the Textile markup language. They are organized from a user's perspective, not according to their implementation. As the specification is developed, please make an effort to keep it abstract, describing Textile the markup language according to what it should do on all platforms. For the most part, leave the testing of what it should not do and bugs it should not have to your platform-specific tests.
8
+
9
+ h2. Format
10
+
11
+ h3. Index
12
+
13
+ Each spec file must be listed in the :specs section of index.yaml. Its file path is relative to the index file.
14
+
15
+ h3. Specs
16
+
17
+ The examples in the specs are grouped into sections. Frequently, the first example in a section repeats the name of its section, in which case the reference manual will include the example without a redundant headline.
18
+
19
+ Example names may be strings or symbols. If the name is a symbol (initial colon and underscores instead of spaces), the example will serve for a test only and will not appear in the Textile reference.
20
+
21
+ Each example should have:
22
+
23
+ * desc - A description of the example--in Textile, of course!
24
+ * input - The Textile code to be passed into a parser
25
+ * output - The expected output from a parser
26
+
27
+ Example:
28
+
29
+ <pre>
30
+ Paragraphs:
31
+ Paragraphs:
32
+ desc: Paragraphs are the default block type.
33
+ input: "This is a paragraph."
34
+ output: "<p>This is a paragraph.</>"
35
+ Explicit paragraphs:
36
+ desc: They can also be made explicit with [@p. @]
37
+ input: "p. Paragraph"
38
+ output: "<p>Paragraph</p>"
39
+ :capital_p:
40
+ desc: A capital P abbreviation may start the sentence.
41
+ input: "P. T. Barnum was a U.S. Showman"
42
+ output: "<p>P. T. Barnum was a U.S. Showman</p>"
43
+ </pre>
44
+
45
+ h2. Using in your parser tests
46
+
47
+ If you are the maintainer of a Textile parsing library, you can incorporate these specifications into your automated tests. YAML parsing libraries are available for many platforms; you just have to write the code to iterate over the examples.
48
+
49
+ If your project uses git, the specifications can be incorporated as a git submodule for easy tracking of the standard to which your library complies.
50
+
51
+ h2. Development
52
+
53
+ Development of the Textile spec takes place on GitHub. You may fork the spec, make changes and send a pull request. Discussion of examples will take place using GitHub comments on the pull request, commit, file or line in question.
54
+
55
+ All development is to take place on *topic* or *version branches*. Once project admins approve changes and increment the spec version, they will merge it to the *master branch*. This way, the master branch is always in a consistent, "official" state and can be relied upon in implementation tests and reference guides. Experimentation and discussion takes place elsewhere.
@@ -0,0 +1,58 @@
1
+ CSS classes and IDs:
2
+ CSS classes and IDs:
3
+ desc: You can apply CSS classes and IDs to phrase modifiers or block modifiers.
4
+ input: p(my-class). This is a paragraph that has a class and this *(#special-phrase)emphasized
5
+ phrase* has an id.
6
+ output: <p class="my-class">This is a paragraph that has a class and this <strong
7
+ id="special-phrase">emphasized phrase</strong> has an id.</p>
8
+ CSS IDs:
9
+ desc: ''
10
+ input: p(#my-paragraph). This is a paragraph that has an id.
11
+ output: <p id="my-paragraph">This is a paragraph that has an id.</p>
12
+ CSS classes and IDs combined:
13
+ desc: You can specify both class and ID, but the class must always come first.
14
+ input: div(myclass#myid). This div has both a CSS class and ID.
15
+ output: <div class="myclass" id="myid">This div has both a <span class="caps">CSS</span>
16
+ class and ID.</div>
17
+ CSS styles:
18
+ CSS styles:
19
+ desc: Apply CSS styles directly to block or phrase modifiers by putting the
20
+ style rules in curly braces.
21
+ input: p{color:blue;letter-spacing:.5em}. Spacey blue
22
+ output: <p style="color:blue;letter-spacing:.5em;">Spacey blue</p>
23
+ Language:
24
+ Language:
25
+ desc: Specify the language of text with square brackets.
26
+ input: p[fr]. Parlez-vous français ?
27
+ output: <p lang="fr">Parlez-vous français ?</p>
28
+ Alignment:
29
+ Alignment:
30
+ desc: 'Text inside blocks can be aligned in four ways:'
31
+ input: |-
32
+ p<. align left
33
+
34
+ p>. align right
35
+
36
+ p=. centered
37
+
38
+ p<>. justified justified justified justified justified justified justified justified justified justified justified justified justified justified justified justified justified justified
39
+ output: |-
40
+ <p style="text-align:left;">align left</p>
41
+ <p style="text-align:right;">align right</p>
42
+ <p style="text-align:center;">centered</p>
43
+ <p style="text-align:justify;">justified justified justified justified justified justified justified justified justified justified justified justified justified justified justified justified justified justified</p>
44
+ Indentation:
45
+ Indentation:
46
+ desc: Text can be indented with single parentheses. For each left paren, left
47
+ pad 1em. For each right paren, right pad 1em. They may be combined for left
48
+ and right padding.
49
+ input: |-
50
+ p(. Left pad 1em.
51
+
52
+ p)). Right pad 2em.
53
+
54
+ p(). Pad both left and right sides 1em.
55
+ output: |-
56
+ <p style="padding-left:1em;">Left pad 1em.</p>
57
+ <p style="padding-right:2em;">Right pad 2em.</p>
58
+ <p style="padding-left:1em;padding-right:1em;">Pad both left and right sides 1em.</p>