markup_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in markup_parser.gemspec
4
+ gemspec
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,11 @@
1
+ require "markup_parser/version"
2
+
3
+ module MarkupParser
4
+ puts "\n**************\nMarkupParser loaded\n**************\n"
5
+
6
+ self.parser_path = File.join(File.dirname(__FILE__), 'markup_parser')
7
+
8
+ def self.parsers
9
+ @parsers ||= Dir.glob( File.join(@parser_path, '*.rb') ).collect {|f| File.basename(f, '.rb') } - ["default"]
10
+ end
11
+ end
@@ -0,0 +1,109 @@
1
+ module MarkupParser
2
+ class Default
3
+ puts "\n**************\nMarkupParser::Default loaded\n**************\n"
4
+ attr_reader :original_text, :nokoguri_parser, :html_text, :lexer_proc
5
+
6
+ def initialize(text='', &lexer)
7
+ @original_text = hot_fixes(text)
8
+ @lexer_proc = lexer || default_lexer
9
+ end
10
+
11
+
12
+ # Returns the fully stylized and sanitized HTML
13
+ def to_html
14
+ begin
15
+ nokoguri_parser.to_xhtml(:save_with => Nokogiri::XML::Node::SaveOptions::AS_XHTML)
16
+ rescue => e
17
+ puts "
18
+ \n******************
19
+ Error in #{self.class}#to_html.
20
+ Reason: #{e.message}.
21
+ Putting error message into the output.
22
+ ******************\n"
23
+ return "<p class='parse_error'>Error in parsing in #{self.class}: #{e.message}.</p>"
24
+ end
25
+ end
26
+
27
+
28
+ # Instantiates a Nokoguri::HTML fragment parser
29
+ def nokoguri_parser
30
+ begin
31
+ @nokoguri_parser ||= Nokogiri::HTML::DocumentFragment.parse(html_text)
32
+ rescue => e
33
+ puts "
34
+ \n******************
35
+ Error in #{self.class}#nokoguri_parser.
36
+ Reason: #{e.message}.
37
+ Putting error message into the output.
38
+ ******************\n"
39
+ @nokoguri_parser = Nokogiri::HTML::DocumentFragment.parse("<p class='parse_error'>Error in parsing in #{self.class}: #{e.message}.</p>")
40
+ end
41
+ end
42
+
43
+ # Instantiates the html_text via this markup parser
44
+ def html_text
45
+ begin
46
+ @html_text ||= parse(@original_text)
47
+ rescue => e
48
+ puts "
49
+ \n******************
50
+ Error in #{self.class}#html_text.
51
+ Reason: #{e.message}.
52
+ Putting error message into the output.
53
+ ******************\n"
54
+ @html_text = "<p class='parse_error'>Error in parsing in #{self.class}: #{e.message}.</p>"
55
+ end
56
+ end
57
+
58
+ # Stylizes the code blocks in the html_text.
59
+ # Uses either a passed in lexer Proc or the default_lexer
60
+ def stylize_code_blocks
61
+ nokoguri_parser.search('pre').each do |node|
62
+ begin
63
+ next unless lang = node['lang']
64
+ text = node.inner_text
65
+ html = @lexer_proc.call(text, lang)
66
+ node.replace(html)
67
+ rescue => e
68
+ puts "
69
+ \n******************
70
+ Error in parsing <pre lang=''> block.
71
+ Reason: #{e.message}.
72
+ Continueing code block parsing.
73
+ ******************\n"
74
+ end
75
+ end
76
+ self
77
+ end
78
+
79
+
80
+
81
+
82
+ private
83
+
84
+ # Default Lexer Proc
85
+ def default_lexer
86
+ Proc.new {|code, lang| Uv.parse(code, "xhtml", lang, false, "railscasts") }
87
+ end
88
+
89
+
90
+
91
+ ######################################################################
92
+ # TO BE OVERRIDDEN
93
+
94
+ # Markup specific Parser invokation
95
+ def parse(text)
96
+ text
97
+ end
98
+
99
+
100
+ # Performs hotfixes on a per-markup basis
101
+ def hot_fixes(text)
102
+ text
103
+ end
104
+
105
+ ######################################################################
106
+
107
+
108
+ end
109
+ end
@@ -0,0 +1,13 @@
1
+ module MarkupParser
2
+ class Html < MarkupParser::Default
3
+ puts "\n**************\nMarkupParser::Html loaded\n**************\n"
4
+
5
+ private
6
+
7
+ # Performs my html hotfixes
8
+ def hot_fixes(text)
9
+ text
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,116 @@
1
+ module MarkupParser
2
+ class Markdown < MarkupParser::Default
3
+ puts "\n**************\nMarkupParser::Markdown loaded\n**************\n"
4
+
5
+ # HTML renderer with Ultraviolet Code Lexer
6
+ class UvHtmlRender < Redcarpet::Render::HTML
7
+ puts "\n**************\nMarkupParser::Markdown::UvHtmlRender loaded\n**************\n"
8
+ def block_code(code, language)
9
+ Uv.parse(code, "xhtml", language, false, "railscasts")
10
+ end
11
+ end
12
+
13
+
14
+ # Only loads the Markdown parser once
15
+ def self.html_parser_with_code_lexer
16
+ @@html_parser_with_code_lexer ||= Redcarpet::Markdown.new(UvHtmlRender, OPTIONS)
17
+ end
18
+
19
+
20
+ # Only loads the Markdown parser once
21
+ def self.html_parser
22
+ @@parser_with_code_blocks ||= Redcarpet::Markdown.new(Redcarpet::Render::HTML, OPTIONS)
23
+ end
24
+
25
+ # Selected Markdown Options
26
+ OPTIONS = {
27
+ autolink: true, #parse links even when they are not enclosed in `<>` characters. Autolinks for the http, https and ftp
28
+ #protocols will be automatically detected. Email addresses are also handled, and http links without protocol, but
29
+ #starting with `www.`
30
+ no_intraemphasis: true, #will stop underscores within words from being treated as the start or end of emphasis blocks
31
+ #and will therefore stop Ruby method or variable names with underscores in them from triggering the emphasis
32
+ lax_html_blocks: true, #HTML blocks do not require to be surrounded by an empty line as in the Markdown standard.
33
+ strikethrough: true, #parse strikethrough, PHP-Markdown style Two `~` characters mark the start of a strikethrough, e.g. `this is ~~good~~ bad`
34
+ fenced_code_blocks: true, #renders fenced code (```) and (~~~)
35
+ tables: true, #parse tables, PHP-Markdown style
36
+ }
37
+
38
+
39
+
40
+
41
+
42
+
43
+
44
+ # Returns the fully stylized HTML for this markdown text
45
+ def html_text
46
+ @html_text ||= parser.render(@original_text)
47
+ end
48
+
49
+ # Sets the parser to include as code lexer
50
+ def stylize_code_blocks
51
+ @parser = Markdown.html_parser_with_code_lexer
52
+ self
53
+ end
54
+
55
+
56
+
57
+
58
+ private
59
+
60
+ # Instantiates the parser for this Markdown instance.
61
+ # Defaults to @@html_parser
62
+ def parser
63
+ @parser ||= Markdown.html_parser
64
+ end
65
+
66
+ # Hacks parse to return this instence's parser for a later call
67
+ def parse(text)
68
+ parser
69
+ end
70
+
71
+ # Performs my markdown hotfixes
72
+ def hot_fixes(text)
73
+ text.standardize_newlines!
74
+ text.convert_tabs_to_spaces!
75
+ text.correct_gh_code_syntax!
76
+ text
77
+ end
78
+
79
+ end
80
+ end
81
+
82
+
83
+
84
+
85
+ # Adds classes to String class...
86
+ class String
87
+
88
+ # Corrects the gh code block syntax mistake where one would write '~~~ .ruby'
89
+ # and the correct code should be '~~~ruby'
90
+ def correct_gh_code_syntax!
91
+ self.gsub!(/~~~\s\.([a-zA-Z]*)/, '~~~\1')
92
+ end
93
+
94
+ # Corrects the ol list elements: which only except the syntax: '1. ...'.
95
+ # Corrected syntaxes: '1)'
96
+ def correct_ol_list_parenth!
97
+ self.gsub!(/(\s*)(\d)\)/,'\1\2.')
98
+ end
99
+
100
+ # Converts tabs (\t) to 2 spaces
101
+ def convert_tabs_to_spaces!
102
+ self.gsub!(/\t/, " ")
103
+ end
104
+
105
+ # Standardize line endings
106
+ def standardize_newlines!
107
+ self.gsub!("\r\n", "\n")
108
+ self.gsub!("\r", "\n")
109
+ end
110
+
111
+ # Corrects the newlines by stripping the leading whitespace.
112
+ # NOTE: this is a hack to workaround the strange Gollum editor indentation behavior
113
+ def sub_newlines!
114
+ self.gsub!(/([\r\n|\n])[\ ]*(.)/,'\1\2')
115
+ end
116
+ end
@@ -0,0 +1,26 @@
1
+ require 'rdoc/markup/to_html'
2
+ module MarkupParser
3
+ class Rdoc < MarkupParser::Default
4
+ puts "\n**************\nMarkupParser::Rdoc loaded\n**************\n"
5
+
6
+
7
+
8
+ private
9
+
10
+ # Returns the fully stylized HTML for this text
11
+ # Forces UTF-8 encoding since Rdoc seems to return ASCII-8BIT
12
+ def parse(text)
13
+ Rdoc.parser.convert(text).force_encoding("UTF-8")
14
+ end
15
+
16
+ # Memorized Parser
17
+ def self.parser
18
+ @@parser ||= RDoc::Markup::ToHtml.new
19
+ end
20
+
21
+ # Performs my rdoc hotfixes
22
+ def hot_fixes(text)
23
+ text
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,3 @@
1
+ module MarkupParser
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/markup_parser/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Quinn"]
6
+ gem.email = ["chaffeqa@gmail.com"]
7
+ gem.description = %q{Standardized markup parsers to use a single format: an object. Instantiate a specific markup class with text to output formated Html. Allows for easy code block highlighting using a Proc; defaults to Uv (ruby Ultraviolet)}
8
+ gem.summary = %q{Standardized markup parsers to a single format. Sole use is for converting markup text to Html. }
9
+ gem.homepage = "https://github.com/chaffeqa/markup_parser"
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "markup_parser"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = MarkupParser::VERSION
17
+
18
+ gem.add_dependency "uv"
19
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: markup_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Quinn
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-09-28 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: uv
16
+ requirement: &70102402385260 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70102402385260
25
+ description: ! 'Standardized markup parsers to use a single format: an object. Instantiate
26
+ a specific markup class with text to output formated Html. Allows for easy code
27
+ block highlighting using a Proc; defaults to Uv (ruby Ultraviolet)'
28
+ email:
29
+ - chaffeqa@gmail.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - .gitignore
35
+ - Gemfile
36
+ - Rakefile
37
+ - lib/markup_parser.rb
38
+ - lib/markup_parser/default.rb
39
+ - lib/markup_parser/html.rb
40
+ - lib/markup_parser/markdown.rb
41
+ - lib/markup_parser/rdoc.rb
42
+ - lib/markup_parser/version.rb
43
+ - markup_parser.gemspec
44
+ homepage: https://github.com/chaffeqa/markup_parser
45
+ licenses: []
46
+ post_install_message:
47
+ rdoc_options: []
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirements: []
63
+ rubyforge_project:
64
+ rubygems_version: 1.8.10
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: Standardized markup parsers to a single format. Sole use is for converting
68
+ markup text to Html.
69
+ test_files: []