pink_shirt 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in pink_shirt.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/Readme.txt ADDED
@@ -0,0 +1,26 @@
1
+ I had a problem: some html needed to be textile
2
+
3
+ None of the available options could get back to exactly where you started.
4
+ They weren't 'reversable'
5
+
6
+ I learned a lot by reading other's who went before.
7
+ I've looked at: html2texile https://github.com/thickpaddy/html2textile
8
+ I've looked at: clothred https://github.com/CynicalRyan/ClothRed
9
+ I've looked at: a deeply buried module in humpyard https://github.com/humpyard/humpyard
10
+
11
+ This library still has some problems with whitespace management, but all the tags convert.
12
+
13
+ It's based around https://github.com/textile/textile-spec
14
+ and built to produce mirror images of html produced by
15
+ RedCloth https://github.com/jgarber/redcloth
16
+
17
+ Usage
18
+ ============================
19
+ text = "This is *my* text."
20
+ html = RedCloth.new(text).to_html
21
+ same = PinkShirt.new(html).to_textile
22
+ same #=> "This is *my* text."
23
+
24
+
25
+
26
+
@@ -0,0 +1,169 @@
1
+ class PinkShirt
2
+ # Attributes
3
+ # -------------------------
4
+ # the sax parser passes an array of attributes along with each tag
5
+ # attrs = ['href', 'http://www.example.com', 'style', 'background-color: snake;']
6
+ #
7
+ # Textile displays attributes in a certain way
8
+ #
9
+ # USAGE
10
+ # ---------------------------
11
+ # Attributes.new(attrs).write
12
+ #
13
+ #
14
+ # colspan=2 => \2
15
+ # rowspan=3 => /3
16
+ # style='padding-left:1em' => (
17
+ # style='padding-right:2em' => ))
18
+ # style='text-align:right' => >
19
+ # style='text-align:left' => <
20
+ # style='text-align:center' => =
21
+ # style='text-align:justify'=> <>
22
+ # class = 'panthers' => (panthers)
23
+ # id = 'banner' => (#banner)
24
+ # class='this' id='that' => (this#that)
25
+ # lang='fr' => [fr]
26
+ # style='color:red' => {color:red}
27
+
28
+
29
+ class Attributes
30
+
31
+ def initialize(attrs)
32
+ @attrs = attrs
33
+ @attrs_hash = Hash[attrs]
34
+ @styles_hash = parse_styles
35
+ @nudges = steal_nudges
36
+ @padding = steal_padding
37
+ end
38
+
39
+ def attrs
40
+ @attrs_hash
41
+ end
42
+
43
+ def styles
44
+ @styles_hash ||= {}
45
+ end
46
+
47
+
48
+ def write
49
+ add = []
50
+ add << colspan
51
+ add << rowspan
52
+ add << nudges
53
+ add << padding
54
+ add << klass_and_id
55
+ add << style
56
+ add << lang
57
+ out = add.join
58
+ return nil if out == ""
59
+ out
60
+ end
61
+
62
+ def parse_styles
63
+ return nil unless attrs['style']
64
+ rules_list = attrs['style'].split(";").map{|rule|
65
+ rule.split(":")
66
+ }
67
+ Hash[rules_list]
68
+ end
69
+
70
+ def styles_to_s
71
+ @styles_hash.map{|k, v| "#{k}:#{v}"}.join(";")
72
+ end
73
+
74
+ def steal_nudges
75
+
76
+ return nil unless attrs['style']
77
+ nudges = []
78
+ style = attrs['style']
79
+
80
+ text_align = case styles.delete('text-align')
81
+ when 'left'
82
+ "<"
83
+ when 'right'
84
+ ">"
85
+ when 'center'
86
+ "="
87
+ when 'justify'
88
+ "<>"
89
+ end
90
+
91
+ nudges << text_align
92
+
93
+ nudges
94
+ end
95
+
96
+ def steal_padding
97
+
98
+ return nil unless attrs['style']
99
+ left = case styles.delete('padding-left')
100
+ when '1em'; "(" ;
101
+ when '2em'; "((";
102
+ end
103
+
104
+ #Alternately, but i'd rather not encourage it
105
+ # left_count = styles.delete('padding-left').to_i
106
+ # left = Array.new(left_count).map{"("}.join
107
+
108
+ right = case styles.delete('padding-right')
109
+ when '1em'; ")" ;
110
+ when '2em'; "))";
111
+ end
112
+
113
+ padding = "#{left}#{right}"
114
+
115
+ end
116
+
117
+ def colspan
118
+ return nil unless attrs['colspan']
119
+ width = attrs['colspan']
120
+ colspan = ""
121
+ colspan << "\\" #literal backslash
122
+ colspan << "#{width}"
123
+ colspan if width
124
+ end
125
+
126
+ def rowspan
127
+ return nil unless attrs['rowspan']
128
+ height = attrs['rowspan']
129
+ colspan = '/' + "#{height}" if height
130
+ end
131
+
132
+ def nudges
133
+ @nudges
134
+ end
135
+
136
+ def padding
137
+ @padding
138
+ end
139
+
140
+
141
+ def klass_and_id
142
+ klass = attrs['class']
143
+ id = attrs['id']
144
+ return nil unless klass || id
145
+ output = ""
146
+ output += "("
147
+ output += klass if klass
148
+ output += "##{id}" if id
149
+ output += ")"
150
+ end
151
+
152
+ def style
153
+ return nil unless styles.length > 0
154
+
155
+ output = ""
156
+ output += "{"
157
+ output += styles_to_s
158
+ output += "}"
159
+ end
160
+
161
+ def lang
162
+ return nil unless attrs.include?('lang')
163
+ output = ""
164
+ output += "["
165
+ output += attrs['lang']
166
+ output += "]"
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,32 @@
1
+ class PinkShirt
2
+ # Gets rid of smart quotes, dashes and some pesky unicode
3
+ # Usage
4
+ # ---------------------
5
+ # Entities.sanitize(input)
6
+ #
7
+ class Entities
8
+ REPLACEMENTS = {
9
+ 8217 => "'", #single quote
10
+ 8216 => "'", #left single quote
11
+ 8220 => '"', #right double quote
12
+ 8221 => '"', #left double quote
13
+ 8211 => "-", #endash
14
+ 8212 => "--", #emdash
15
+ 8230 => "...", #ellipsis
16
+ 215 => "x", #times
17
+ 8242 => "'", #inch
18
+ 174 => "(r)", #registered trademark
19
+ 8482 => "(tm)", # trademark
20
+ 169 => "(c)", #copyright
21
+ }
22
+
23
+
24
+ def self.sanitize(string)
25
+ string_chars = string.unpack("U*")
26
+ string_chars.map! {|x| REPLACEMENTS[x] ? REPLACEMENTS[x].unpack('U*') : x}
27
+ string_chars.flatten
28
+ string = string_chars.flatten.pack('U*')
29
+ string
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,5 @@
1
+ # Used for moving flags around during processing.
2
+ # Clearly underused
3
+ class PinkShirt::Flags
4
+ attr_accessor :pre
5
+ end
@@ -0,0 +1,64 @@
1
+ class PinkShirt
2
+ #
3
+ # Nokogiri::Sax builds output as a long stream.
4
+ # Output collects all the writes as an array, and then joins them when required
5
+ # you can also lock it for writing.
6
+ #
7
+ # Usage
8
+ # ------------------------
9
+ # stream = Output.new
10
+ # stream << 'goods... '
11
+ # stream.inspect #=> ['goods... ']
12
+ # stream.lock('suspicious tag')
13
+ # stream << 'bad stuff'
14
+ # stream.inspect #=> ['goods... ']
15
+ # stream.unlock
16
+ # stream << 'good again'
17
+ # stream.inspect #=> ['goods... ', 'good again']
18
+ #
19
+ class Output
20
+ def initialize
21
+ @contents = []
22
+ @silenced = false
23
+ @locks = []
24
+ @ouput = ""
25
+ end
26
+
27
+ def lock(key)
28
+ @locks << key
29
+ @silenced = true
30
+ end
31
+
32
+ def unlock
33
+ @locks.pop
34
+ @silenced = false if @locks.empty?
35
+ end
36
+
37
+ def << (more)
38
+ @contents << more unless @silenced
39
+ end
40
+
41
+ def join
42
+ @output = @contents.join
43
+ end
44
+
45
+ def sanitize
46
+ @output = PinkShirt::Entities.sanitize(@output)
47
+ end
48
+
49
+ def trim
50
+ @output = @output.chomp.chomp.chomp
51
+ end
52
+
53
+ def to_s
54
+ join
55
+ sanitize
56
+ trim
57
+ @output
58
+ end
59
+
60
+ def inspect
61
+ @contents
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,15 @@
1
+ class PinkShirt
2
+ class SAX::Acronym < SAX::Base
3
+ TAGS = %(acronym)
4
+ def initialize(*args)
5
+ @current_acronym = []
6
+ super
7
+ end
8
+ def start_acronym attrs
9
+ @current_acronym << attrs['title']
10
+ end
11
+ def end_acronym
12
+ @output << "(#{@current_acronym.pop})"
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,20 @@
1
+ class PinkShirt
2
+ class SAX::Base
3
+ TAGS = []
4
+ def initialize(input, flags)
5
+ @output = input
6
+ @flags = flags
7
+ end
8
+
9
+ def add_attributes(attrs)
10
+ PinkShirt::Attributes.new(attrs).write
11
+ end
12
+
13
+ def to_s
14
+ @output
15
+ end
16
+
17
+ def method_missing(*args)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,114 @@
1
+ class PinkShirt
2
+ class SAX::Basic < SAX::Base
3
+ TAGS = %w(strong b em i cite del ins sub sup span code)
4
+ attr_accessor :nospan
5
+ def start_strong attrs
6
+ @output << "*"
7
+ @output << add_attributes(attrs) if add_attributes(attrs)
8
+ end
9
+
10
+ alias :start_b :start_strong
11
+
12
+ def end_strong
13
+ @output << "*"
14
+
15
+ end
16
+
17
+ def start_b attrs
18
+ @output << "**"
19
+ @output << add_attributes(attrs) if add_attributes(attrs)
20
+ end
21
+
22
+ def end_b
23
+ @output << "**"
24
+ end
25
+
26
+ def start_cite attrs
27
+ @output << "??"
28
+ @output << add_attributes(attrs) if add_attributes(attrs)
29
+ end
30
+
31
+ def end_cite
32
+ @output << "??"
33
+ end
34
+
35
+ def start_em attrs
36
+ @output << "_"
37
+ @output << add_attributes(attrs) if add_attributes(attrs)
38
+ end
39
+
40
+ def end_em
41
+ @output << "_"
42
+ end
43
+
44
+ def start_i attrs
45
+ @output << "__"
46
+ @output << add_attributes(attrs) if add_attributes(attrs)
47
+ end
48
+
49
+ def end_i
50
+ @output << "__"
51
+ end
52
+
53
+ def start_del attrs
54
+ @output << "-"
55
+ @output << add_attributes(attrs) if add_attributes(attrs)
56
+ end
57
+
58
+ def end_del
59
+ @output << "-"
60
+ end
61
+
62
+ def start_ins attrs
63
+ @output << "+"
64
+ @output << add_attributes(attrs) if add_attributes(attrs)
65
+ end
66
+
67
+ def end_ins
68
+ @output << "+"
69
+ end
70
+
71
+ def start_sub attrs
72
+ @output << "~"
73
+ @output << add_attributes(attrs) if add_attributes(attrs)
74
+ end
75
+
76
+ def end_sub
77
+ @output << "~"
78
+ end
79
+
80
+ def start_sup attrs
81
+ @output << "^"
82
+ @output << add_attributes(attrs) if add_attributes(attrs)
83
+ end
84
+
85
+ def end_sup
86
+ @output << "^"
87
+ end
88
+
89
+ # redcloth loves to put caps in 'span-caps', it's lame
90
+ def start_span attrs
91
+ if attrs['class'] == 'caps'
92
+ @nospan = true
93
+ else
94
+ @output << "%"
95
+ @output << add_attributes(attrs) if add_attributes(attrs)
96
+ end
97
+ end
98
+
99
+ def end_span
100
+ @output << "%" unless @nospan
101
+ @nospan = false
102
+ end
103
+
104
+ def start_code attrs
105
+ @output << "@"
106
+ @output << add_attributes(attrs) if add_attributes(attrs)
107
+ end
108
+
109
+ def end_code
110
+ @output << "@"
111
+ end
112
+
113
+ end
114
+ end
@@ -0,0 +1,69 @@
1
+ class PinkShirt
2
+ class SAX::BlockLevel < SAX::Base
3
+ TAGS = %w(p br h1 h2 h3 h4 h5 h6 div blockquote)
4
+ def start_p attrs
5
+ @output << "p#{add_attributes(attrs)}. " if add_attributes(attrs)
6
+ end
7
+
8
+ def end_p
9
+ @output << "\n\n"
10
+ end
11
+
12
+ def start_div attrs
13
+ @output << "div#{add_attributes(attrs)}. "
14
+ end
15
+
16
+ def end_div
17
+ @output << "\n"
18
+ end
19
+
20
+ def start_blockquote attrs
21
+ if attrs['cite']
22
+ @output << "bq.:#{attrs['cite']} "
23
+ else
24
+ @output << "bq#{add_attributes(attrs)}. "
25
+ end
26
+ end
27
+
28
+ def start_br attrs
29
+ end
30
+
31
+ def end_br
32
+ @output << "\n"
33
+ end
34
+
35
+ def start_h1 attrs
36
+ @output << "h1#{add_attributes(attrs)}. "
37
+ end
38
+
39
+ def start_h2 attrs
40
+ @output << "h2#{add_attributes(attrs)}. "
41
+ end
42
+
43
+ def start_h3 attrs
44
+ @output << "h3#{add_attributes(attrs)}. "
45
+ end
46
+
47
+ def start_h4 attrs
48
+ @output << "h4#{add_attributes(attrs)}. "
49
+ end
50
+
51
+ def start_h5 attrs
52
+ @output << "h5#{add_attributes(attrs)}. "
53
+ end
54
+
55
+ def start_h6 attrs
56
+ @output << "h6#{add_attributes(attrs)}. "
57
+ end
58
+
59
+ def end_h
60
+ @output << "\n\n"
61
+ end
62
+ alias :end_h1 :end_h
63
+ alias :end_h2 :end_h
64
+ alias :end_h3 :end_h
65
+ alias :end_h4 :end_h
66
+ alias :end_h5 :end_h
67
+ alias :end_h6 :end_h
68
+ end
69
+ end
@@ -0,0 +1,11 @@
1
+ class PinkShirt
2
+ class SAX::BoilerPlate < SAX::Base
3
+ TAGS = %w(html body head)
4
+ def start_html(attrs); end
5
+ def end_html; end
6
+ def start_body(attrs); end
7
+ def end_body; end
8
+ def start_head(attrs); end
9
+ def end_head; end
10
+ end
11
+ end
@@ -0,0 +1,17 @@
1
+ class PinkShirt
2
+ class SAX::Images < SAX::Base
3
+ TAGS = %(img)
4
+ def start_img attrs
5
+ title = attrs['alt'] || attrs['title']
6
+ title = nil if title == "" || title == " "
7
+ image = "!"
8
+ image += attrs['src']
9
+ image += "(#{title})" if title
10
+ image += "!"
11
+ @output << image
12
+ end
13
+ def end_img
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ class PinkShirt
2
+ class SAX::Links < SAX::Base
3
+ TAGS = %(a)
4
+ def start_a attrs
5
+ @link_info = attrs
6
+ link = ""
7
+ link += '"'
8
+ link += "(#{attrs['class']}). " if attrs['class']
9
+ @output << link
10
+ end
11
+ def end_a
12
+ link = ""
13
+ link += " (#{@link_info['title']})" if @link_info['title']
14
+ link += '"'
15
+ link += ":#{@link_info['href']}"
16
+ @output << link
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,67 @@
1
+ class PinkShirt
2
+ class SAX::Lists < SAX::Base
3
+ TAGS = %w(ul ol li dl dt dd)
4
+ def initialize(*args)
5
+ @last_depth = 0
6
+ @nesting = []
7
+ super
8
+ end
9
+ def start_ul(attrs)
10
+ @nesting.push "ul"
11
+ @in_ul = true
12
+ @output << "\n" unless @nesting.length == 1
13
+ end
14
+
15
+ def end_ul
16
+ @nesting.pop
17
+ @in_ul = false
18
+ end
19
+
20
+ def start_ol(attrs)
21
+ @nesting.push('ol')
22
+ @in_ol = true
23
+ @output << "\n" unless @nesting.length == 1
24
+ end
25
+
26
+ def end_ol
27
+ @nesting.pop
28
+ @in_ol = false
29
+ end
30
+
31
+ def start_li(attrs)
32
+ current_nest = @nesting.last
33
+ if current_nest == "ol"
34
+ chr = "#"
35
+ else
36
+ chr = "*"
37
+ end
38
+ @output << Array.new(@nesting.length).map{chr}.join + " "
39
+ end
40
+
41
+ def end_li
42
+ @output << "\n" unless @last_depth > @nesting.length
43
+ @last_depth = @nesting.length
44
+ end
45
+
46
+ def start_dl attrs
47
+ end
48
+
49
+ def end_dl
50
+ end
51
+
52
+ def start_dt attrs
53
+ @output << "- "
54
+ end
55
+
56
+ def end_dt
57
+ end
58
+
59
+ def start_dd attrs
60
+ @output << " := "
61
+ end
62
+
63
+ def end_dd
64
+ @output << "\n"
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,13 @@
1
+ class PinkShirt
2
+
3
+ class SAX::Preformatted < SAX::Base
4
+ TAGS = %(pre)
5
+ def start_pre attrs
6
+ @flags.pre = true
7
+ @output << "pre#{add_attributes(attrs)}. "
8
+ end
9
+ def end_pre
10
+ @flags.pre = false
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,12 @@
1
+ class PinkShirt
2
+ # One does not textilize the contents of script tags, it is not done.
3
+ class SAX::Script < SAX::Base
4
+ TAGS = %(script)
5
+ def start_script(attrs)
6
+ @output.lock('script')
7
+ end
8
+ def end_script
9
+ @output.unlock
10
+ end
11
+ end
12
+ end