augusts_fancy_blog_post_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 99d7da55d26f476768aa953faba2cfbd46df0db9
4
+ data.tar.gz: e240d0f31fc833d20c9711294fe66291d989eebd
5
+ SHA512:
6
+ metadata.gz: 389ec05498d9c781fc24dfeaa4afced9a8a20d2df0eb0c46f13bca4f5cbd6df6b669193d6047bdad20bc927b9e3c3a7a66b9aba3ca0d92ef66730359a1f331c1
7
+ data.tar.gz: a5a18957a04d413e1481a00f89bdcd1572833be1630e40d98e643cd3ca3b1caf6f8dc1529fc6ab3ed73334e208f0c031cd412098b98f69becd8432178dbf1a77
@@ -0,0 +1,13 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "augusts_fancy_blog_post_parser"
3
+ s.version = "0.1.0"
4
+ s.authors = ["August Lilleaas"]
5
+ s.email = "august@augustl.com"
6
+ s.summary = "August's fancy blog post parser"
7
+ s.add_runtime_dependency "nokogiri", "~> 1.5.0"
8
+ s.add_runtime_dependency "parallel", "~> 0.7.0"
9
+ s.files = [
10
+ "lib/augusts_fancy_blog_post_parser.rb",
11
+ "augusts_fancy_blog_post_parser.gemspec"
12
+ ]
13
+ end
@@ -0,0 +1,124 @@
1
+ require "nokogiri"
2
+ require "parallel"
3
+ require "open3"
4
+
5
+ class AugustsFancyBlogPostParser
6
+ attr_reader :url
7
+ attr_reader :body
8
+
9
+ def self.escape_html(html)
10
+ html
11
+ .gsub(/&(?!\w+;)/, '&')
12
+ .gsub(/</, '&lt;')
13
+ .gsub(/>/, '&gt;')
14
+ .gsub(/"/, '&quot;')
15
+ end
16
+
17
+ def self.parse_html_attrs(attrs)
18
+ if attrs.blank?
19
+ return {}
20
+ else
21
+ Nokogiri::HTML("<div#{attrs}></div>").css("div")[0].attributes.inject({}) do |prev, curr|
22
+ key = curr[0]
23
+ value = curr[1]
24
+ prev[key] = value.value
25
+ prev
26
+ end
27
+ end
28
+ end
29
+
30
+ def self.format_code(code, lang)
31
+ if lang
32
+ formatted = nil
33
+ Open3.popen3("pygmentize", "-l", lang, "-f", "html", "-P", "nowrap=true") do |stdin, stdout, stderr, wait_thr|
34
+ stdin.write(code)
35
+ stdin.close_write
36
+
37
+ lines = []
38
+ until (line = stdout.gets).nil?
39
+ lines.push(line)
40
+ end
41
+
42
+ formatted = lines.join("").rstrip
43
+ end
44
+
45
+ "<code class=\"highlight\">#{formatted}</code>"
46
+ else
47
+ "<code>" + escape_html(code) + "</code>"
48
+ end
49
+ end
50
+
51
+ def self.separate_on(str, re, result = [])
52
+ match_data = str.match(re)
53
+ if match_data
54
+ result.push(match_data.pre_match)
55
+ result.push(match_data)
56
+ separate_on(match_data.post_match, re, result)
57
+ else
58
+ result.push(str)
59
+ result
60
+ end
61
+ end
62
+
63
+ def initialize(cwd, path)
64
+ @path = path
65
+ @cwd = cwd
66
+ @url = "/" + @path[0...-(File.extname(@path).length)]
67
+ reload!
68
+ end
69
+
70
+ def self.parse_body(body)
71
+ chunks = separate_on(body, /\<code(.*?)\>(.*?)\<\/code\>/m)
72
+ Parallel.map(chunks, :in_threads => chunks.length) do |chunk|
73
+ case chunk
74
+ when String
75
+ chunk
76
+ when MatchData
77
+ attrs = parse_html_attrs(chunk[1])
78
+ code = chunk[2]
79
+ format_code(code, attrs["data-lang"])
80
+ else
81
+ raise ArgumentError.new("Unexpected chunk #{chunk.class}.")
82
+ end
83
+ end.join("")
84
+ end
85
+
86
+ def reload!
87
+ html = File.read(@cwd + "/" + @path)
88
+
89
+ @headers = {}
90
+ num_header_chars = 0
91
+
92
+ html.each_line.each do |line|
93
+ if line =~ /[a-z]+\:./
94
+ key, _, value = *line.partition(":")
95
+ @headers[key] = value.chomp
96
+ num_header_chars += line.length
97
+ else
98
+ break
99
+ end
100
+ end
101
+
102
+ @body = self.class.parse_body(html[num_header_chars..-1])
103
+ end
104
+
105
+ def id
106
+ url.gsub("/", ":")
107
+ end
108
+
109
+ def title
110
+ @headers["title"].strip
111
+ end
112
+
113
+ def timestamp
114
+ Time.parse(@headers["date"])
115
+ end
116
+
117
+ def html_date
118
+ timestamp.strftime("%Y-%m-%d")
119
+ end
120
+
121
+ def display_date
122
+ timestamp.strftime("%B %d, %Y")
123
+ end
124
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: augusts_fancy_blog_post_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - August Lilleaas
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.5.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.5.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: parallel
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 0.7.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: 0.7.0
41
+ description:
42
+ email: august@augustl.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/augusts_fancy_blog_post_parser.rb
48
+ - augusts_fancy_blog_post_parser.gemspec
49
+ homepage:
50
+ licenses: []
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubyforge_project:
68
+ rubygems_version: 2.0.14
69
+ signing_key:
70
+ specification_version: 4
71
+ summary: August's fancy blog post parser
72
+ test_files: []