mill 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/Gemfile +6 -0
- data/LICENSE +21 -0
- data/README.md +4 -0
- data/Rakefile +1 -0
- data/TODO.txt +38 -0
- data/lib/mill.rb +336 -0
- data/lib/mill/file_types.rb +30 -0
- data/lib/mill/html_helpers.rb +166 -0
- data/lib/mill/navigator.rb +84 -0
- data/lib/mill/resource.rb +116 -0
- data/lib/mill/resources/feed.rb +63 -0
- data/lib/mill/resources/generic.rb +15 -0
- data/lib/mill/resources/image.rb +36 -0
- data/lib/mill/resources/redirect.rb +36 -0
- data/lib/mill/resources/robots.rb +25 -0
- data/lib/mill/resources/sitemap.rb +35 -0
- data/lib/mill/resources/text.rb +157 -0
- data/lib/mill/schemas/atom.xsd +244 -0
- data/lib/mill/schemas/sitemap.xsd +116 -0
- data/lib/mill/tasks.rake +31 -0
- data/lib/mill/version.rb +5 -0
- data/mill.gemspec +37 -0
- metadata +247 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
class Mill
|
2
|
+
|
3
|
+
FileTypes = {
|
4
|
+
text: %w{
|
5
|
+
text/plain
|
6
|
+
text/html
|
7
|
+
},
|
8
|
+
image: %w{
|
9
|
+
image/gif
|
10
|
+
image/jpeg
|
11
|
+
image/png
|
12
|
+
image/tiff
|
13
|
+
image/vnd.microsoft.icon
|
14
|
+
image/x-icon
|
15
|
+
},
|
16
|
+
generic: %w{
|
17
|
+
text/css
|
18
|
+
application/font-sfnt
|
19
|
+
application/x-font-opentype
|
20
|
+
application/x-font-otf
|
21
|
+
application/x-font-truetype
|
22
|
+
application/x-font-ttf
|
23
|
+
application/javascript
|
24
|
+
application/x-javascript
|
25
|
+
text/javascript
|
26
|
+
application/pdf
|
27
|
+
},
|
28
|
+
}
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
module HTMLHelpers
|
2
|
+
|
3
|
+
class HTMLError < Exception; end
|
4
|
+
|
5
|
+
IgnoreErrors = %Q{
|
6
|
+
<table> lacks "summary" attribute
|
7
|
+
<img> lacks "alt" attribute
|
8
|
+
<form> proprietary attribute "novalidate"
|
9
|
+
<input> attribute "type" has invalid value "email"
|
10
|
+
<input> attribute "tabindex" has invalid value "-1"
|
11
|
+
}.split(/\n/).map(&:strip)
|
12
|
+
|
13
|
+
def html_document(&block)
|
14
|
+
builder = Nokogiri::HTML::Builder.new(encoding: 'utf-8') do |doc|
|
15
|
+
yield(doc)
|
16
|
+
end
|
17
|
+
builder.doc
|
18
|
+
end
|
19
|
+
|
20
|
+
def html_fragment(&block)
|
21
|
+
html = Nokogiri::HTML::DocumentFragment.parse('')
|
22
|
+
Nokogiri::HTML::Builder.with(html) do |html|
|
23
|
+
yield(html)
|
24
|
+
end
|
25
|
+
html
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_html(str)
|
29
|
+
html = Nokogiri::HTML::Document.parse(str) { |config| config.strict }
|
30
|
+
html.errors.each do |error|
|
31
|
+
next if error.message =~ /^Tag (.*?) invalid$/
|
32
|
+
raise HTMLError, "HTML error at line #{error.line}, column #{error.column}: #{error.message}"
|
33
|
+
end
|
34
|
+
html
|
35
|
+
end
|
36
|
+
|
37
|
+
def tidy_html(html, &block)
|
38
|
+
html_str = html.to_s
|
39
|
+
tidy = TidyFFI::Tidy.new(html_str, char_encoding: 'UTF8')
|
40
|
+
errors = parse_tidy_errors(tidy).reject do |error|
|
41
|
+
IgnoreErrors.include?(error[:error])
|
42
|
+
end
|
43
|
+
unless errors.empty?
|
44
|
+
full_error = StringIO.new('')
|
45
|
+
full_error.puts "invalid HTML:"
|
46
|
+
html_lines = html_str.split(/\n/)
|
47
|
+
errors.each do |error|
|
48
|
+
full_error.puts "\t#{error[:msg]}:"
|
49
|
+
html_lines.each_with_index do |html_line, i|
|
50
|
+
if i >= [0, error[:line] - 2].max && i <= [error[:line] + 2, html_lines.length].min
|
51
|
+
if i == error[:line]
|
52
|
+
output = [
|
53
|
+
error[:column] > 0 ? (html_line[0 .. error[:column] - 1]) : '',
|
54
|
+
Term::ANSIColor.negative,
|
55
|
+
html_line[error[:column]],
|
56
|
+
Term::ANSIColor.clear,
|
57
|
+
html_line[error[:column] + 1 .. -1],
|
58
|
+
]
|
59
|
+
else
|
60
|
+
output = [html_line]
|
61
|
+
end
|
62
|
+
full_error.puts "\t\t%3s: %s" % [i + 1, output.join]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
if block_given?
|
66
|
+
yield(full_error.string)
|
67
|
+
else
|
68
|
+
STDERR.print(full_error.string)
|
69
|
+
end
|
70
|
+
raise HTMLError, "HTML error: #{error[:msg]}" if error[:type] == :error
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse_tidy_errors(tidy)
|
76
|
+
return [] unless tidy.errors
|
77
|
+
tidy.errors.split(/\n/).map do |error_str|
|
78
|
+
error_str =~ /^line (\d+) column (\d+) - (.*?): (.*)$/ or raise "Can't parse error: #{error_str}"
|
79
|
+
{
|
80
|
+
msg: error_str,
|
81
|
+
line: $1.to_i - 1,
|
82
|
+
column: $2.to_i - 1,
|
83
|
+
type: $3.downcase.to_sym,
|
84
|
+
error: $4.strip,
|
85
|
+
}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def replace_element(html, xpath, &block)
|
90
|
+
html.xpath(xpath).each do |elem|
|
91
|
+
elem.replace(yield(elem))
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def amazon_button(asin)
|
96
|
+
html_fragment do |html|
|
97
|
+
html.a(href: "http://www.amazon.com/dp/#{asin}") do
|
98
|
+
html.img(src: '/images/buy1._V46787834_.gif', alt: 'Buy from Amazon.com')
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def paypal_button(id)
|
104
|
+
html_fragment do |html|
|
105
|
+
html.form(action: 'https://www.paypal.com/cgi-bin/webscr', method: 'post') do
|
106
|
+
html.input(
|
107
|
+
type: 'hidden',
|
108
|
+
name: 'cmd',
|
109
|
+
value: '_s-xclick')
|
110
|
+
html.input(
|
111
|
+
type: 'hidden',
|
112
|
+
name: 'hosted_button_id',
|
113
|
+
value: id)
|
114
|
+
html.input(
|
115
|
+
type: 'image',
|
116
|
+
src: 'https://www.paypalobjects.com/en_US/i/btn/btn_buynow_LG.gif',
|
117
|
+
name: 'submit',
|
118
|
+
alt: 'PayPal - The safer, easier way to pay online!')
|
119
|
+
html.img(
|
120
|
+
alt: '',
|
121
|
+
border: 0,
|
122
|
+
width: 1,
|
123
|
+
height: 1,
|
124
|
+
src: 'https://www.paypalobjects.com/en_US/i/scr/pixel.gif')
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def google_analytics(tracker_id)
|
130
|
+
html_fragment do |html|
|
131
|
+
html.script(type: 'text/javascript') do
|
132
|
+
html << %Q{
|
133
|
+
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
134
|
+
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
135
|
+
}
|
136
|
+
end
|
137
|
+
html.script(type: 'text/javascript') do
|
138
|
+
html << %Q{
|
139
|
+
try {
|
140
|
+
var pageTracker = _gat._getTracker("#{tracker_id}");
|
141
|
+
pageTracker._trackPageview();
|
142
|
+
} catch(err) {}
|
143
|
+
}
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
class PreText < String
|
149
|
+
|
150
|
+
def to_html
|
151
|
+
html_fragment do |html|
|
152
|
+
html.pre(self)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
class ::String
|
159
|
+
|
160
|
+
def to_html
|
161
|
+
Nokogiri::HTML::DocumentFragment.parse(RubyPants.new(self).to_html).to_html
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
class Mill
|
2
|
+
|
3
|
+
class Navigator
|
4
|
+
|
5
|
+
class Item
|
6
|
+
|
7
|
+
attr_accessor :uri
|
8
|
+
attr_accessor :title
|
9
|
+
|
10
|
+
def initialize(params={})
|
11
|
+
params.each { |k, v| send("#{k}=", v) }
|
12
|
+
end
|
13
|
+
|
14
|
+
def uri=(uri)
|
15
|
+
@uri = Addressable::URI.parse(uri)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_accessor :items
|
21
|
+
|
22
|
+
def initialize(params={})
|
23
|
+
@items = []
|
24
|
+
params.each { |k, v| send("#{k}=", v) }
|
25
|
+
end
|
26
|
+
|
27
|
+
def item_states_for_uri(uri, &block)
|
28
|
+
current_item = within_item = nil
|
29
|
+
if (item = @items.find { |item| item.uri.relative? && item.uri == uri })
|
30
|
+
current_item = item
|
31
|
+
else
|
32
|
+
within_item = @items.select do |item|
|
33
|
+
item.uri.relative? && uri.path.start_with?(item.uri.path)
|
34
|
+
end.sort_by do |item|
|
35
|
+
item.uri.path.count('/')
|
36
|
+
end.last
|
37
|
+
end
|
38
|
+
@items.each do |item|
|
39
|
+
if item == current_item
|
40
|
+
state = :current
|
41
|
+
elsif item == within_item
|
42
|
+
state = :within
|
43
|
+
else
|
44
|
+
state = :other
|
45
|
+
end
|
46
|
+
yield(item, state)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def first_item
|
51
|
+
@items.first
|
52
|
+
end
|
53
|
+
|
54
|
+
def last_item
|
55
|
+
@items.last
|
56
|
+
end
|
57
|
+
|
58
|
+
def previous_item(uri)
|
59
|
+
index = find_item_index_by_uri(uri)
|
60
|
+
if index && index > 0
|
61
|
+
@items[index - 1]
|
62
|
+
else
|
63
|
+
nil
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def next_item(uri)
|
68
|
+
index = find_item_index_by_uri(uri)
|
69
|
+
if index && index < @items.length - 1
|
70
|
+
@items[index + 1]
|
71
|
+
else
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def find_item_index_by_uri(uri)
|
77
|
+
if (item = @items.find { |item| item.uri == uri })
|
78
|
+
@items.index(item)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
class Mill
|
2
|
+
|
3
|
+
class Resource
|
4
|
+
|
5
|
+
attr_accessor :input_file
|
6
|
+
attr_accessor :output_file
|
7
|
+
attr_accessor :date
|
8
|
+
attr_accessor :public
|
9
|
+
attr_accessor :content
|
10
|
+
attr_accessor :mill
|
11
|
+
|
12
|
+
def self.type
|
13
|
+
# implemented by subclass
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(params={})
|
17
|
+
params.each { |k, v| send("#{k}=", v) }
|
18
|
+
end
|
19
|
+
|
20
|
+
def input_file=(p)
|
21
|
+
@input_file = Path.new(p)
|
22
|
+
end
|
23
|
+
|
24
|
+
def output_file=(p)
|
25
|
+
@output_file = Path.new(p)
|
26
|
+
end
|
27
|
+
|
28
|
+
def date=(x)
|
29
|
+
@date = case x
|
30
|
+
when String
|
31
|
+
DateTime.parse(x)
|
32
|
+
when Time
|
33
|
+
DateTime.parse(x.to_s)
|
34
|
+
when Date, DateTime
|
35
|
+
x
|
36
|
+
else
|
37
|
+
raise "Can't assign date: #{x.inspect}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def public=(x)
|
42
|
+
@public = case x
|
43
|
+
when 'false', FalseClass
|
44
|
+
false
|
45
|
+
when 'true', TrueClass
|
46
|
+
true
|
47
|
+
else
|
48
|
+
raise "Can't assign public: #{x.inspect}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def uri
|
53
|
+
raise "#{@input_file}: No output file defined for #{self.class}" unless @output_file
|
54
|
+
path = '/' + @output_file.relative_to(@mill.output_dir).to_s
|
55
|
+
path.sub!(%r{/index\.html$}, '/')
|
56
|
+
path.sub!(%r{\.html$}, '') if @mill.shorten_uris
|
57
|
+
Addressable::URI.parse(path)
|
58
|
+
end
|
59
|
+
|
60
|
+
def absolute_uri
|
61
|
+
@mill.site_uri + uri
|
62
|
+
end
|
63
|
+
|
64
|
+
def tag_uri
|
65
|
+
@mill.tag_uri + uri
|
66
|
+
end
|
67
|
+
|
68
|
+
def change_frequency
|
69
|
+
:weekly
|
70
|
+
end
|
71
|
+
|
72
|
+
def final_content
|
73
|
+
@content
|
74
|
+
end
|
75
|
+
|
76
|
+
def load
|
77
|
+
raise "#{uri} (#{self.class}): no content" unless @input_file || @content
|
78
|
+
self.date ||= @input_file ? @input_file.mtime : DateTime.now
|
79
|
+
@mill.update_resource(self)
|
80
|
+
end
|
81
|
+
|
82
|
+
def build
|
83
|
+
@output_file.dirname.mkpath
|
84
|
+
if (c = final_content)
|
85
|
+
# ;;warn "#{uri}: writing #{@input_file} to #{@output_file}"
|
86
|
+
@output_file.write(c.to_s)
|
87
|
+
@output_file.utime(@date.to_time, @date.to_time)
|
88
|
+
elsif @input_file
|
89
|
+
# ;;warn "#{uri}: copying #{@input_file} to #{@output_file}"
|
90
|
+
@input_file.copy(@output_file)
|
91
|
+
else
|
92
|
+
raise "Can't build resource without content or input file: #{uri}"
|
93
|
+
end
|
94
|
+
validate
|
95
|
+
end
|
96
|
+
|
97
|
+
def validate
|
98
|
+
if (schema = @mill.schema_for_type(self.class.type))
|
99
|
+
validate_xml(schema)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def validate_xml(schema)
|
104
|
+
doc = Nokogiri::XML::Document.parse(@output_file.open)
|
105
|
+
errors = doc.errors + schema.validate(doc)
|
106
|
+
unless errors.empty?
|
107
|
+
errors.each do |error|
|
108
|
+
warn "[#{error.file}:#{error.line}:#{error.column}] #{error}"
|
109
|
+
end
|
110
|
+
raise "#{uri}: Validation failed"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# see http://www.sitemaps.org/protocol.php
|
2
|
+
|
3
|
+
class Mill
|
4
|
+
|
5
|
+
class Resource
|
6
|
+
|
7
|
+
class Feed < Resource
|
8
|
+
|
9
|
+
include HTMLHelpers
|
10
|
+
|
11
|
+
def self.type
|
12
|
+
:feed
|
13
|
+
end
|
14
|
+
|
15
|
+
def load
|
16
|
+
resources = @mill.public_resources.sort_by(&:date)
|
17
|
+
builder = Nokogiri::XML::Builder.new do |xml|
|
18
|
+
xml.feed(xmlns: 'http://www.w3.org/2005/Atom') do
|
19
|
+
xml.id(@mill.tag_uri)
|
20
|
+
xml.generator(*@mill.feed_generator)
|
21
|
+
xml.title(@mill.site_title)
|
22
|
+
xml.link(rel: 'alternate', type: 'text/html', href: @mill.home_resource.uri)
|
23
|
+
xml.link(rel: 'self', type: 'application/atom+xml', href: uri)
|
24
|
+
xml.author do
|
25
|
+
xml.name(@mill.feed_author_name)
|
26
|
+
xml.uri(@mill.feed_author_uri)
|
27
|
+
xml.email(@mill.feed_author_email)
|
28
|
+
end
|
29
|
+
xml.updated(resources.last.date.iso8601)
|
30
|
+
resources.each do |resource|
|
31
|
+
xml.entry do
|
32
|
+
xml.title(resource.title) if resource.title
|
33
|
+
xml.link(rel: 'alternate', href: resource.uri)
|
34
|
+
xml.id(resource.tag_uri)
|
35
|
+
xml.updated(resource.date.iso8601)
|
36
|
+
xml.published(resource.date.iso8601)
|
37
|
+
if (resource.respond_to?(:feed_summary))
|
38
|
+
type, data = resource.feed_summary
|
39
|
+
xml.summary(type: type) { xml.cdata(data) } if type && data
|
40
|
+
end
|
41
|
+
if (resource.respond_to?(:feed_content))
|
42
|
+
type, data = resource.feed_content
|
43
|
+
xml.content(type: type) { xml.cdata(data) }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
@content = builder.doc
|
50
|
+
super
|
51
|
+
end
|
52
|
+
|
53
|
+
def link_html
|
54
|
+
html_fragment do |html|
|
55
|
+
html.link(href: uri, rel: 'alternate', type: 'application/atom+xml')
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|