augusts_fancy_blog_post_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 99d7da55d26f476768aa953faba2cfbd46df0db9
4
+ data.tar.gz: e240d0f31fc833d20c9711294fe66291d989eebd
5
+ SHA512:
6
+ metadata.gz: 389ec05498d9c781fc24dfeaa4afced9a8a20d2df0eb0c46f13bca4f5cbd6df6b669193d6047bdad20bc927b9e3c3a7a66b9aba3ca0d92ef66730359a1f331c1
7
+ data.tar.gz: a5a18957a04d413e1481a00f89bdcd1572833be1630e40d98e643cd3ca3b1caf6f8dc1529fc6ab3ed73334e208f0c031cd412098b98f69becd8432178dbf1a77
@@ -0,0 +1,13 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "augusts_fancy_blog_post_parser"
3
+ s.version = "0.1.0"
4
+ s.authors = ["August Lilleaas"]
5
+ s.email = "august@augustl.com"
6
+ s.summary = "August's fancy blog post parser"
7
+ s.add_runtime_dependency "nokogiri", "~> 1.5.0"
8
+ s.add_runtime_dependency "parallel", "~> 0.7.0"
9
+ s.files = [
10
+ "lib/augusts_fancy_blog_post_parser.rb",
11
+ "augusts_fancy_blog_post_parser.gemspec"
12
+ ]
13
+ end
@@ -0,0 +1,124 @@
1
+ require "nokogiri"
2
+ require "parallel"
3
+ require "open3"
4
+
5
+ class AugustsFancyBlogPostParser
6
+ attr_reader :url
7
+ attr_reader :body
8
+
9
+ def self.escape_html(html)
10
+ html
11
+ .gsub(/&(?!\w+;)/, '&')
12
+ .gsub(/</, '&lt;')
13
+ .gsub(/>/, '&gt;')
14
+ .gsub(/"/, '&quot;')
15
+ end
16
+
17
+ def self.parse_html_attrs(attrs)
18
+ if attrs.blank?
19
+ return {}
20
+ else
21
+ Nokogiri::HTML("<div#{attrs}></div>").css("div")[0].attributes.inject({}) do |prev, curr|
22
+ key = curr[0]
23
+ value = curr[1]
24
+ prev[key] = value.value
25
+ prev
26
+ end
27
+ end
28
+ end
29
+
30
+ def self.format_code(code, lang)
31
+ if lang
32
+ formatted = nil
33
+ Open3.popen3("pygmentize", "-l", lang, "-f", "html", "-P", "nowrap=true") do |stdin, stdout, stderr, wait_thr|
34
+ stdin.write(code)
35
+ stdin.close_write
36
+
37
+ lines = []
38
+ until (line = stdout.gets).nil?
39
+ lines.push(line)
40
+ end
41
+
42
+ formatted = lines.join("").rstrip
43
+ end
44
+
45
+ "<code class=\"highlight\">#{formatted}</code>"
46
+ else
47
+ "<code>" + escape_html(code) + "</code>"
48
+ end
49
+ end
50
+
51
+ def self.separate_on(str, re, result = [])
52
+ match_data = str.match(re)
53
+ if match_data
54
+ result.push(match_data.pre_match)
55
+ result.push(match_data)
56
+ separate_on(match_data.post_match, re, result)
57
+ else
58
+ result.push(str)
59
+ result
60
+ end
61
+ end
62
+
63
+ def initialize(cwd, path)
64
+ @path = path
65
+ @cwd = cwd
66
+ @url = "/" + @path[0...-(File.extname(@path).length)]
67
+ reload!
68
+ end
69
+
70
+ def self.parse_body(body)
71
+ chunks = separate_on(body, /\<code(.*?)\>(.*?)\<\/code\>/m)
72
+ Parallel.map(chunks, :in_threads => chunks.length) do |chunk|
73
+ case chunk
74
+ when String
75
+ chunk
76
+ when MatchData
77
+ attrs = parse_html_attrs(chunk[1])
78
+ code = chunk[2]
79
+ format_code(code, attrs["data-lang"])
80
+ else
81
+ raise ArgumentError.new("Unexpected chunk #{chunk.class}.")
82
+ end
83
+ end.join("")
84
+ end
85
+
86
+ def reload!
87
+ html = File.read(@cwd + "/" + @path)
88
+
89
+ @headers = {}
90
+ num_header_chars = 0
91
+
92
+ html.each_line.each do |line|
93
+ if line =~ /[a-z]+\:./
94
+ key, _, value = *line.partition(":")
95
+ @headers[key] = value.chomp
96
+ num_header_chars += line.length
97
+ else
98
+ break
99
+ end
100
+ end
101
+
102
+ @body = self.class.parse_body(html[num_header_chars..-1])
103
+ end
104
+
105
+ def id
106
+ url.gsub("/", ":")
107
+ end
108
+
109
+ def title
110
+ @headers["title"].strip
111
+ end
112
+
113
+ def timestamp
114
+ Time.parse(@headers["date"])
115
+ end
116
+
117
+ def html_date
118
+ timestamp.strftime("%Y-%m-%d")
119
+ end
120
+
121
+ def display_date
122
+ timestamp.strftime("%B %d, %Y")
123
+ end
124
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: augusts_fancy_blog_post_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - August Lilleaas
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.5.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.5.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: parallel
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 0.7.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 0.7.0
41
+ description:
42
+ email: august@augustl.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/augusts_fancy_blog_post_parser.rb
48
+ - augusts_fancy_blog_post_parser.gemspec
49
+ homepage:
50
+ licenses: []
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubyforge_project:
68
+ rubygems_version: 2.0.14
69
+ signing_key:
70
+ specification_version: 4
71
+ summary: August's fancy blog post parser
72
+ test_files: []