markup_parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/lib/markup_parser.rb +11 -0
- data/lib/markup_parser/default.rb +109 -0
- data/lib/markup_parser/html.rb +13 -0
- data/lib/markup_parser/markdown.rb +116 -0
- data/lib/markup_parser/rdoc.rb +26 -0
- data/lib/markup_parser/version.rb +3 -0
- data/markup_parser.gemspec +19 -0
- metadata +69 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require "markup_parser/version"
|
2
|
+
|
3
|
+
module MarkupParser
|
4
|
+
puts "\n**************\nMarkupParser loaded\n**************\n"
|
5
|
+
|
6
|
+
self.parser_path = File.join(File.dirname(__FILE__), 'markup_parser')
|
7
|
+
|
8
|
+
def self.parsers
|
9
|
+
@parsers ||= Dir.glob( File.join(@parser_path, '*.rb') ).collect {|f| File.basename(f, '.rb') } - ["default"]
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module MarkupParser
|
2
|
+
class Default
|
3
|
+
puts "\n**************\nMarkupParser::Default loaded\n**************\n"
|
4
|
+
attr_reader :original_text, :nokoguri_parser, :html_text, :lexer_proc
|
5
|
+
|
6
|
+
def initialize(text='', &lexer)
|
7
|
+
@original_text = hot_fixes(text)
|
8
|
+
@lexer_proc = lexer || default_lexer
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
# Returns the fully stylized and sanitized HTML
|
13
|
+
def to_html
|
14
|
+
begin
|
15
|
+
nokoguri_parser.to_xhtml(:save_with => Nokogiri::XML::Node::SaveOptions::AS_XHTML)
|
16
|
+
rescue => e
|
17
|
+
puts "
|
18
|
+
\n******************
|
19
|
+
Error in #{self.class}#to_html.
|
20
|
+
Reason: #{e.message}.
|
21
|
+
Putting error message into the output.
|
22
|
+
******************\n"
|
23
|
+
return "<p class='parse_error'>Error in parsing in #{self.class}: #{e.message}.</p>"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# Instantiates a Nokoguri::HTML fragment parser
|
29
|
+
def nokoguri_parser
|
30
|
+
begin
|
31
|
+
@nokoguri_parser ||= Nokogiri::HTML::DocumentFragment.parse(html_text)
|
32
|
+
rescue => e
|
33
|
+
puts "
|
34
|
+
\n******************
|
35
|
+
Error in #{self.class}#nokoguri_parser.
|
36
|
+
Reason: #{e.message}.
|
37
|
+
Putting error message into the output.
|
38
|
+
******************\n"
|
39
|
+
@nokoguri_parser = Nokogiri::HTML::DocumentFragment.parse("<p class='parse_error'>Error in parsing in #{self.class}: #{e.message}.</p>")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Instantiates the html_text via this markup parser
|
44
|
+
def html_text
|
45
|
+
begin
|
46
|
+
@html_text ||= parse(@original_text)
|
47
|
+
rescue => e
|
48
|
+
puts "
|
49
|
+
\n******************
|
50
|
+
Error in #{self.class}#html_text.
|
51
|
+
Reason: #{e.message}.
|
52
|
+
Putting error message into the output.
|
53
|
+
******************\n"
|
54
|
+
@html_text = "<p class='parse_error'>Error in parsing in #{self.class}: #{e.message}.</p>"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Stylizes the code blocks in the html_text.
|
59
|
+
# Uses either a passed in lexer Proc or the default_lexer
|
60
|
+
def stylize_code_blocks
|
61
|
+
nokoguri_parser.search('pre').each do |node|
|
62
|
+
begin
|
63
|
+
next unless lang = node['lang']
|
64
|
+
text = node.inner_text
|
65
|
+
html = @lexer_proc.call(text, lang)
|
66
|
+
node.replace(html)
|
67
|
+
rescue => e
|
68
|
+
puts "
|
69
|
+
\n******************
|
70
|
+
Error in parsing <pre lang=''> block.
|
71
|
+
Reason: #{e.message}.
|
72
|
+
Continueing code block parsing.
|
73
|
+
******************\n"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
self
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
# Default Lexer Proc
|
85
|
+
def default_lexer
|
86
|
+
Proc.new {|code, lang| Uv.parse(code, "xhtml", lang, false, "railscasts") }
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
######################################################################
|
92
|
+
# TO BE OVERRIDDEN
|
93
|
+
|
94
|
+
# Markup specific Parser invokation
|
95
|
+
def parse(text)
|
96
|
+
text
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
# Performs hotfixes on a per-markup basis
|
101
|
+
def hot_fixes(text)
|
102
|
+
text
|
103
|
+
end
|
104
|
+
|
105
|
+
######################################################################
|
106
|
+
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
module MarkupParser
|
2
|
+
class Markdown < MarkupParser::Default
|
3
|
+
puts "\n**************\nMarkupParser::Markdown loaded\n**************\n"
|
4
|
+
|
5
|
+
# HTML renderer with Ultraviolet Code Lexer
|
6
|
+
class UvHtmlRender < Redcarpet::Render::HTML
|
7
|
+
puts "\n**************\nMarkupParser::Markdown::UvHtmlRender loaded\n**************\n"
|
8
|
+
def block_code(code, language)
|
9
|
+
Uv.parse(code, "xhtml", language, false, "railscasts")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
# Only loads the Markdown parser once
|
15
|
+
def self.html_parser_with_code_lexer
|
16
|
+
@@html_parser_with_code_lexer ||= Redcarpet::Markdown.new(UvHtmlRender, OPTIONS)
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
# Only loads the Markdown parser once
|
21
|
+
def self.html_parser
|
22
|
+
@@parser_with_code_blocks ||= Redcarpet::Markdown.new(Redcarpet::Render::HTML, OPTIONS)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Selected Markdown Options
|
26
|
+
OPTIONS = {
|
27
|
+
autolink: true, #parse links even when they are not enclosed in `<>` characters. Autolinks for the http, https and ftp
|
28
|
+
#protocols will be automatically detected. Email addresses are also handled, and http links without protocol, but
|
29
|
+
#starting with `www.`
|
30
|
+
no_intraemphasis: true, #will stop underscores within words from being treated as the start or end of emphasis blocks
|
31
|
+
#and will therefore stop Ruby method or variable names with underscores in them from triggering the emphasis
|
32
|
+
lax_html_blocks: true, #HTML blocks do not require to be surrounded by an empty line as in the Markdown standard.
|
33
|
+
strikethrough: true, #parse strikethrough, PHP-Markdown style Two `~` characters mark the start of a strikethrough, e.g. `this is ~~good~~ bad`
|
34
|
+
fenced_code_blocks: true, #renders fenced code (```) and (~~~)
|
35
|
+
tables: true, #parse tables, PHP-Markdown style
|
36
|
+
}
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
# Returns the fully stylized HTML for this markdown text
|
45
|
+
def html_text
|
46
|
+
@html_text ||= parser.render(@original_text)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Sets the parser to include as code lexer
|
50
|
+
def stylize_code_blocks
|
51
|
+
@parser = Markdown.html_parser_with_code_lexer
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# Instantiates the parser for this Markdown instance.
|
61
|
+
# Defaults to @@html_parser
|
62
|
+
def parser
|
63
|
+
@parser ||= Markdown.html_parser
|
64
|
+
end
|
65
|
+
|
66
|
+
# Hacks parse to return this instence's parser for a later call
|
67
|
+
def parse(text)
|
68
|
+
parser
|
69
|
+
end
|
70
|
+
|
71
|
+
# Performs my markdown hotfixes
|
72
|
+
def hot_fixes(text)
|
73
|
+
text.standardize_newlines!
|
74
|
+
text.convert_tabs_to_spaces!
|
75
|
+
text.correct_gh_code_syntax!
|
76
|
+
text
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
# Adds classes to String class...
|
86
|
+
class String
|
87
|
+
|
88
|
+
# Corrects the gh code block syntax mistake where one would write '~~~ .ruby'
|
89
|
+
# and the correct code should be '~~~ruby'
|
90
|
+
def correct_gh_code_syntax!
|
91
|
+
self.gsub!(/~~~\s\.([a-zA-Z]*)/, '~~~\1')
|
92
|
+
end
|
93
|
+
|
94
|
+
# Corrects the ol list elements: which only except the syntax: '1. ...'.
|
95
|
+
# Corrected syntaxes: '1)'
|
96
|
+
def correct_ol_list_parenth!
|
97
|
+
self.gsub!(/(\s*)(\d)\)/,'\1\2.')
|
98
|
+
end
|
99
|
+
|
100
|
+
# Converts tabs (\t) to 2 spaces
|
101
|
+
def convert_tabs_to_spaces!
|
102
|
+
self.gsub!(/\t/, " ")
|
103
|
+
end
|
104
|
+
|
105
|
+
# Standardize line endings
|
106
|
+
def standardize_newlines!
|
107
|
+
self.gsub!("\r\n", "\n")
|
108
|
+
self.gsub!("\r", "\n")
|
109
|
+
end
|
110
|
+
|
111
|
+
# Corrects the newlines by stripping the leading whitespace.
|
112
|
+
# NOTE: this is a hack to workaround the strange Gollum editor indentation behavior
|
113
|
+
def sub_newlines!
|
114
|
+
self.gsub!(/([\r\n|\n])[\ ]*(.)/,'\1\2')
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rdoc/markup/to_html'
|
2
|
+
module MarkupParser
|
3
|
+
class Rdoc < MarkupParser::Default
|
4
|
+
puts "\n**************\nMarkupParser::Rdoc loaded\n**************\n"
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
# Returns the fully stylized HTML for this text
|
11
|
+
# Forces UTF-8 encoding since Rdoc seems to return ASCII-8BIT
|
12
|
+
def parse(text)
|
13
|
+
Rdoc.parser.convert(text).force_encoding("UTF-8")
|
14
|
+
end
|
15
|
+
|
16
|
+
# Memorized Parser
|
17
|
+
def self.parser
|
18
|
+
@@parser ||= RDoc::Markup::ToHtml.new
|
19
|
+
end
|
20
|
+
|
21
|
+
# Performs my rdoc hotfixes
|
22
|
+
def hot_fixes(text)
|
23
|
+
text
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/markup_parser/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Quinn"]
|
6
|
+
gem.email = ["chaffeqa@gmail.com"]
|
7
|
+
gem.description = %q{Standardized markup parsers to use a single format: an object. Instantiate a specific markup class with text to output formated Html. Allows for easy code block highlighting using a Proc; defaults to Uv (ruby Ultraviolet)}
|
8
|
+
gem.summary = %q{Standardized markup parsers to a single format. Sole use is for converting markup text to Html. }
|
9
|
+
gem.homepage = "https://github.com/chaffeqa/markup_parser"
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "markup_parser"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = MarkupParser::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency "uv"
|
19
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: markup_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Quinn
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-09-28 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: uv
|
16
|
+
requirement: &70102402385260 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70102402385260
|
25
|
+
description: ! 'Standardized markup parsers to use a single format: an object. Instantiate
|
26
|
+
a specific markup class with text to output formated Html. Allows for easy code
|
27
|
+
block highlighting using a Proc; defaults to Uv (ruby Ultraviolet)'
|
28
|
+
email:
|
29
|
+
- chaffeqa@gmail.com
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- .gitignore
|
35
|
+
- Gemfile
|
36
|
+
- Rakefile
|
37
|
+
- lib/markup_parser.rb
|
38
|
+
- lib/markup_parser/default.rb
|
39
|
+
- lib/markup_parser/html.rb
|
40
|
+
- lib/markup_parser/markdown.rb
|
41
|
+
- lib/markup_parser/rdoc.rb
|
42
|
+
- lib/markup_parser/version.rb
|
43
|
+
- markup_parser.gemspec
|
44
|
+
homepage: https://github.com/chaffeqa/markup_parser
|
45
|
+
licenses: []
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options: []
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
requirements: []
|
63
|
+
rubyforge_project:
|
64
|
+
rubygems_version: 1.8.10
|
65
|
+
signing_key:
|
66
|
+
specification_version: 3
|
67
|
+
summary: Standardized markup parsers to a single format. Sole use is for converting
|
68
|
+
markup text to Html.
|
69
|
+
test_files: []
|