markup_parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/Rakefile +2 -0
- data/lib/markup_parser.rb +11 -0
- data/lib/markup_parser/default.rb +109 -0
- data/lib/markup_parser/html.rb +13 -0
- data/lib/markup_parser/markdown.rb +116 -0
- data/lib/markup_parser/rdoc.rb +26 -0
- data/lib/markup_parser/version.rb +3 -0
- data/markup_parser.gemspec +19 -0
- metadata +69 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require "markup_parser/version"
|
2
|
+
|
3
|
+
module MarkupParser
|
4
|
+
puts "\n**************\nMarkupParser loaded\n**************\n"
|
5
|
+
|
6
|
+
self.parser_path = File.join(File.dirname(__FILE__), 'markup_parser')
|
7
|
+
|
8
|
+
def self.parsers
|
9
|
+
@parsers ||= Dir.glob( File.join(@parser_path, '*.rb') ).collect {|f| File.basename(f, '.rb') } - ["default"]
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module MarkupParser
|
2
|
+
class Default
|
3
|
+
puts "\n**************\nMarkupParser::Default loaded\n**************\n"
|
4
|
+
attr_reader :original_text, :nokoguri_parser, :html_text, :lexer_proc
|
5
|
+
|
6
|
+
def initialize(text='', &lexer)
|
7
|
+
@original_text = hot_fixes(text)
|
8
|
+
@lexer_proc = lexer || default_lexer
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
# Returns the fully stylized and sanitized HTML
|
13
|
+
def to_html
|
14
|
+
begin
|
15
|
+
nokoguri_parser.to_xhtml(:save_with => Nokogiri::XML::Node::SaveOptions::AS_XHTML)
|
16
|
+
rescue => e
|
17
|
+
puts "
|
18
|
+
\n******************
|
19
|
+
Error in #{self.class}#to_html.
|
20
|
+
Reason: #{e.message}.
|
21
|
+
Putting error message into the output.
|
22
|
+
******************\n"
|
23
|
+
return "<p class='parse_error'>Error in parsing in #{self.class}: #{e.message}.</p>"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
# Instantiates a Nokoguri::HTML fragment parser
|
29
|
+
def nokoguri_parser
|
30
|
+
begin
|
31
|
+
@nokoguri_parser ||= Nokogiri::HTML::DocumentFragment.parse(html_text)
|
32
|
+
rescue => e
|
33
|
+
puts "
|
34
|
+
\n******************
|
35
|
+
Error in #{self.class}#nokoguri_parser.
|
36
|
+
Reason: #{e.message}.
|
37
|
+
Putting error message into the output.
|
38
|
+
******************\n"
|
39
|
+
@nokoguri_parser = Nokogiri::HTML::DocumentFragment.parse("<p class='parse_error'>Error in parsing in #{self.class}: #{e.message}.</p>")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Instantiates the html_text via this markup parser
|
44
|
+
def html_text
|
45
|
+
begin
|
46
|
+
@html_text ||= parse(@original_text)
|
47
|
+
rescue => e
|
48
|
+
puts "
|
49
|
+
\n******************
|
50
|
+
Error in #{self.class}#html_text.
|
51
|
+
Reason: #{e.message}.
|
52
|
+
Putting error message into the output.
|
53
|
+
******************\n"
|
54
|
+
@html_text = "<p class='parse_error'>Error in parsing in #{self.class}: #{e.message}.</p>"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Stylizes the code blocks in the html_text.
|
59
|
+
# Uses either a passed in lexer Proc or the default_lexer
|
60
|
+
def stylize_code_blocks
|
61
|
+
nokoguri_parser.search('pre').each do |node|
|
62
|
+
begin
|
63
|
+
next unless lang = node['lang']
|
64
|
+
text = node.inner_text
|
65
|
+
html = @lexer_proc.call(text, lang)
|
66
|
+
node.replace(html)
|
67
|
+
rescue => e
|
68
|
+
puts "
|
69
|
+
\n******************
|
70
|
+
Error in parsing <pre lang=''> block.
|
71
|
+
Reason: #{e.message}.
|
72
|
+
Continueing code block parsing.
|
73
|
+
******************\n"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
self
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
# Default Lexer Proc
|
85
|
+
def default_lexer
|
86
|
+
Proc.new {|code, lang| Uv.parse(code, "xhtml", lang, false, "railscasts") }
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
######################################################################
|
92
|
+
# TO BE OVERRIDDEN
|
93
|
+
|
94
|
+
# Markup specific Parser invokation
|
95
|
+
def parse(text)
|
96
|
+
text
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
# Performs hotfixes on a per-markup basis
|
101
|
+
def hot_fixes(text)
|
102
|
+
text
|
103
|
+
end
|
104
|
+
|
105
|
+
######################################################################
|
106
|
+
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
module MarkupParser
|
2
|
+
class Markdown < MarkupParser::Default
|
3
|
+
puts "\n**************\nMarkupParser::Markdown loaded\n**************\n"
|
4
|
+
|
5
|
+
# HTML renderer with Ultraviolet Code Lexer
|
6
|
+
class UvHtmlRender < Redcarpet::Render::HTML
|
7
|
+
puts "\n**************\nMarkupParser::Markdown::UvHtmlRender loaded\n**************\n"
|
8
|
+
def block_code(code, language)
|
9
|
+
Uv.parse(code, "xhtml", language, false, "railscasts")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
# Only loads the Markdown parser once
|
15
|
+
def self.html_parser_with_code_lexer
|
16
|
+
@@html_parser_with_code_lexer ||= Redcarpet::Markdown.new(UvHtmlRender, OPTIONS)
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
# Only loads the Markdown parser once
|
21
|
+
def self.html_parser
|
22
|
+
@@parser_with_code_blocks ||= Redcarpet::Markdown.new(Redcarpet::Render::HTML, OPTIONS)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Selected Markdown Options
|
26
|
+
OPTIONS = {
|
27
|
+
autolink: true, #parse links even when they are not enclosed in `<>` characters. Autolinks for the http, https and ftp
|
28
|
+
#protocols will be automatically detected. Email addresses are also handled, and http links without protocol, but
|
29
|
+
#starting with `www.`
|
30
|
+
no_intraemphasis: true, #will stop underscores within words from being treated as the start or end of emphasis blocks
|
31
|
+
#and will therefore stop Ruby method or variable names with underscores in them from triggering the emphasis
|
32
|
+
lax_html_blocks: true, #HTML blocks do not require to be surrounded by an empty line as in the Markdown standard.
|
33
|
+
strikethrough: true, #parse strikethrough, PHP-Markdown style Two `~` characters mark the start of a strikethrough, e.g. `this is ~~good~~ bad`
|
34
|
+
fenced_code_blocks: true, #renders fenced code (```) and (~~~)
|
35
|
+
tables: true, #parse tables, PHP-Markdown style
|
36
|
+
}
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
# Returns the fully stylized HTML for this markdown text
|
45
|
+
def html_text
|
46
|
+
@html_text ||= parser.render(@original_text)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Sets the parser to include as code lexer
|
50
|
+
def stylize_code_blocks
|
51
|
+
@parser = Markdown.html_parser_with_code_lexer
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
# Instantiates the parser for this Markdown instance.
|
61
|
+
# Defaults to @@html_parser
|
62
|
+
def parser
|
63
|
+
@parser ||= Markdown.html_parser
|
64
|
+
end
|
65
|
+
|
66
|
+
# Hacks parse to return this instence's parser for a later call
|
67
|
+
def parse(text)
|
68
|
+
parser
|
69
|
+
end
|
70
|
+
|
71
|
+
# Performs my markdown hotfixes
|
72
|
+
def hot_fixes(text)
|
73
|
+
text.standardize_newlines!
|
74
|
+
text.convert_tabs_to_spaces!
|
75
|
+
text.correct_gh_code_syntax!
|
76
|
+
text
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
# Adds classes to String class...
|
86
|
+
class String
|
87
|
+
|
88
|
+
# Corrects the gh code block syntax mistake where one would write '~~~ .ruby'
|
89
|
+
# and the correct code should be '~~~ruby'
|
90
|
+
def correct_gh_code_syntax!
|
91
|
+
self.gsub!(/~~~\s\.([a-zA-Z]*)/, '~~~\1')
|
92
|
+
end
|
93
|
+
|
94
|
+
# Corrects the ol list elements: which only except the syntax: '1. ...'.
|
95
|
+
# Corrected syntaxes: '1)'
|
96
|
+
def correct_ol_list_parenth!
|
97
|
+
self.gsub!(/(\s*)(\d)\)/,'\1\2.')
|
98
|
+
end
|
99
|
+
|
100
|
+
# Converts tabs (\t) to 2 spaces
|
101
|
+
def convert_tabs_to_spaces!
|
102
|
+
self.gsub!(/\t/, " ")
|
103
|
+
end
|
104
|
+
|
105
|
+
# Standardize line endings
|
106
|
+
def standardize_newlines!
|
107
|
+
self.gsub!("\r\n", "\n")
|
108
|
+
self.gsub!("\r", "\n")
|
109
|
+
end
|
110
|
+
|
111
|
+
# Corrects the newlines by stripping the leading whitespace.
|
112
|
+
# NOTE: this is a hack to workaround the strange Gollum editor indentation behavior
|
113
|
+
def sub_newlines!
|
114
|
+
self.gsub!(/([\r\n|\n])[\ ]*(.)/,'\1\2')
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rdoc/markup/to_html'
|
2
|
+
module MarkupParser
|
3
|
+
class Rdoc < MarkupParser::Default
|
4
|
+
puts "\n**************\nMarkupParser::Rdoc loaded\n**************\n"
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
# Returns the fully stylized HTML for this text
|
11
|
+
# Forces UTF-8 encoding since Rdoc seems to return ASCII-8BIT
|
12
|
+
def parse(text)
|
13
|
+
Rdoc.parser.convert(text).force_encoding("UTF-8")
|
14
|
+
end
|
15
|
+
|
16
|
+
# Memorized Parser
|
17
|
+
def self.parser
|
18
|
+
@@parser ||= RDoc::Markup::ToHtml.new
|
19
|
+
end
|
20
|
+
|
21
|
+
# Performs my rdoc hotfixes
|
22
|
+
def hot_fixes(text)
|
23
|
+
text
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/markup_parser/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Quinn"]
|
6
|
+
gem.email = ["chaffeqa@gmail.com"]
|
7
|
+
gem.description = %q{Standardized markup parsers to use a single format: an object. Instantiate a specific markup class with text to output formated Html. Allows for easy code block highlighting using a Proc; defaults to Uv (ruby Ultraviolet)}
|
8
|
+
gem.summary = %q{Standardized markup parsers to a single format. Sole use is for converting markup text to Html. }
|
9
|
+
gem.homepage = "https://github.com/chaffeqa/markup_parser"
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "markup_parser"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = MarkupParser::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency "uv"
|
19
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: markup_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Quinn
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-09-28 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: uv
|
16
|
+
requirement: &70102402385260 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70102402385260
|
25
|
+
description: ! 'Standardized markup parsers to use a single format: an object. Instantiate
|
26
|
+
a specific markup class with text to output formated Html. Allows for easy code
|
27
|
+
block highlighting using a Proc; defaults to Uv (ruby Ultraviolet)'
|
28
|
+
email:
|
29
|
+
- chaffeqa@gmail.com
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- .gitignore
|
35
|
+
- Gemfile
|
36
|
+
- Rakefile
|
37
|
+
- lib/markup_parser.rb
|
38
|
+
- lib/markup_parser/default.rb
|
39
|
+
- lib/markup_parser/html.rb
|
40
|
+
- lib/markup_parser/markdown.rb
|
41
|
+
- lib/markup_parser/rdoc.rb
|
42
|
+
- lib/markup_parser/version.rb
|
43
|
+
- markup_parser.gemspec
|
44
|
+
homepage: https://github.com/chaffeqa/markup_parser
|
45
|
+
licenses: []
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options: []
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
requirements: []
|
63
|
+
rubyforge_project:
|
64
|
+
rubygems_version: 1.8.10
|
65
|
+
signing_key:
|
66
|
+
specification_version: 3
|
67
|
+
summary: Standardized markup parsers to a single format. Sole use is for converting
|
68
|
+
markup text to Html.
|
69
|
+
test_files: []
|