docter 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,303 @@
1
+ module Docter
2
+
3
+ module HTML
4
+
5
+ module_function
6
+
7
+ # :call-seq:
8
+ # inner_text_from(html) => text
9
+ #
10
+ # Returns the inner text from some HTML text, effectively stripping the tags, normalizing whitespaces
11
+ # and stripping leading/trailing spaces.
12
+ def inner_text_from(html)
13
+ html.gsub(/<(\w*).*?>(.*?)<\/\1\s*>/m, "\\2").strip.gsub(/\s+/m, " ")
14
+ end
15
+
16
+ def regexp_element(name)
17
+ Regexp.new("<(#{name})\\s*(.*?)>(.*?)<\\/\\1\\s*>", Regexp::MULTILINE + Regexp::IGNORECASE)
18
+ end
19
+
20
+ def regexp_attribute(name)
21
+ Regexp.new("(#{name})=([\"'])(.*?)\\2", Regexp::MULTILINE + Regexp::IGNORECASE)
22
+ end
23
+
24
+ end
25
+
26
+
27
+ # Base class for resources like pages, templates, ToC, and anything else that you can create dynamically,
28
+ # or load from a file. It's the second usage that's more interesting: when coming from a file, the resource
29
+ # is created lazily, and you can detect when it is modified and reload it.
30
+ #
31
+ # A class that inherits from Resource must: a) call #load before using any value obtain from the resource
32
+ # (e.g page title), and b) implement one or more create_from_[format] methods for each content format it
33
+ # supports (e.g. create_from_textile).
34
+ module Resource
35
+
36
+ # Maps various filename extensions to the appropriate format. You only need to use this when the filename
37
+ # extension is not the same as the format, e.g. map ".txt" to :plain, but not necessary to map ".textile".
38
+ EXTENSIONS = { ""=>:plain, ".txt"=>:plain, ".text"=>:plain, ".thtml"=>:textile, ".mhtml"=>:markdown }
39
+
40
+ class << self
41
+
42
+ # :call-seq:
43
+ # format_from(filename) => symbol
44
+ #
45
+ # Returns the format based on the filename. Basically uses the filename extension, possibly mapped
46
+ # using EXTENSIONS, and returns :plain if the filename has no extension.
47
+ def format_from(filename)
48
+ ext = File.extname(filename)
49
+ EXTENSIONS[ext] || ext[1..-1].to_sym
50
+ end
51
+
52
+ end
53
+
54
+
55
+ module Reloadable
56
+
57
+ # The filename, if this resource comes from a file, otherwise nil.
58
+ attr_reader :filename
59
+
60
+ # :call-seq:
61
+ # modified() => time
62
+ #
63
+ # Returns the date/time this resource was last modified. If the resource comes from a file,
64
+ # the timestamp of the file, otherwise the when the resource was created.
65
+ def modified()
66
+ @filename ? File.stat(@filename).mtime : @modified
67
+ end
68
+
69
+ # :call-seq:
70
+ # modified?() => boolean
71
+ #
72
+ # Returns true if the resource was modified since it was lase (re)loaded. Only applies to resources
73
+ # created from a file, all other resources return false.
74
+ def modified?()
75
+ @filename ? File.stat(@filename).mtime > @modified : false
76
+ end
77
+
78
+ # :call-seq:
79
+ # reload()
80
+ #
81
+ # Reloads the resource. Only applies to resources created from a file, otherwise acts like load.
82
+ # You can safely call it for all resources, for example:
83
+ # page.reload if page.modified?
84
+ def reload()
85
+ @loaded = false if @filename
86
+ load
87
+ end
88
+
89
+ def to_s() #:nodoc:
90
+ @filename || super
91
+ end
92
+
93
+ protected
94
+
95
+ # See Base::new.
96
+ def init_from(*args, &block)
97
+ options = Hash === args.last ? args.pop : {}
98
+ case args.first
99
+ when String
100
+ @filename = args.shift
101
+ raise ArgumentError, "Expecting file name and options, found too may arguments." unless args.empty?
102
+ # We'll load the file later, but we need to known the mtime in case someone calls modified?/reload first.
103
+ @modified = File.stat(@filename).mtime
104
+ @load_using = lambda do
105
+ puts "Loading #{filename}"
106
+ # We need to know when the file we're about to read was last modified, but only keep the new mtime
107
+ # if we managed to read it. We're avoiding race conditions with a user editing this file.
108
+ modified = File.stat(@filename).mtime
109
+ create Resource.format_from(@filename), File.read(@filename), options
110
+ @modified = modified
111
+ end
112
+ when Symbol
113
+ @modified = Time.now # Best guess
114
+ format, content = args.shift, args.shift
115
+ raise ArgumentError, "Expecting format (as symbol) followed by content (string), found too many arguments." unless args.empty?
116
+ @load_using = lambda { create format, content, options }
117
+ else
118
+ if args.empty? && block
119
+ @modified = Time.now # Best guess
120
+ @load_using = lambda { block.call options }
121
+ else
122
+ raise ArgumentError, "Expecting file name, or (format, content), not sure what to do with these arguments."
123
+ end
124
+ end
125
+ end
126
+
127
+ # :call-seq:
128
+ # load()
129
+ #
130
+ # Loads the resource. Call this method before anything that depends on the content of the resource,
131
+ # for example:
132
+ # def title()
133
+ # load
134
+ # @title # Created by load
135
+ # end
136
+ def load()
137
+ unless @loaded
138
+ @load_using.call
139
+ @loaded = true
140
+ end
141
+ end
142
+
143
+ # :call-seq:
144
+ # create(format, content, options)
145
+ #
146
+ # Creates the resource using the specified format, content and options passed during construction.
147
+ #
148
+ # This method may be called multiple times, specifically each time the resource is loaded. Override,
149
+ # if you need to perform any clean up to assure propert creation, etc. Otherwise, just let it delegate
150
+ # to a create_from_[format] method, such as create_from_textile.
151
+ def create(format, content, options)
152
+ method = "create_from_#{format}"
153
+ if respond_to?(method)
154
+ send method, content, options
155
+ else
156
+ raise ArgumentError, "Don't know how to create #{self} from :#{format}."
157
+ end
158
+ end
159
+
160
+ # :call-seq:
161
+ # erb(content, binding?) => content
162
+ #
163
+ # Passes the content through ERB processing. Nothing fancy, but allows you to run filters,
164
+ # include files, generate timestamps, calculate sales tax, etc.
165
+ def erb_this(content, binding = nil)
166
+ ERB.new(content).result(binding)
167
+ end
168
+
169
+ end
170
+
171
+
172
+ class Base
173
+
174
+ include Reloadable, HTML
175
+
176
+ # :call-seq:
177
+ # new(filename, options?)
178
+ # new(format, content, options?)
179
+ # new(options?) { |options| ... }
180
+ #
181
+ # The first form loads this resource from the specified file. Decides on the format based on the filename.
182
+ # You can then detect modification and reload as necessary, for example:
183
+ # page.reload if page.modified?
184
+ #
185
+ # The second form creates this resource from content in the specified format. This one you cannot reload.
186
+ # For example:
187
+ # Page.new(:plain, "HAI")
188
+ #
189
+ # The third form creates this resource by calling the block with the supplied options.
190
+ def initialize(*args, &block)
191
+ init_from *args, &block
192
+ end
193
+
194
+ end
195
+
196
+ end
197
+
198
+
199
+ # Table of contents.
200
+ #
201
+ # A ToC is an array of entries, each entry providing a link to and a title, and may itself be a ToC.
202
+ #
203
+ # Supports the Enumerable methods for operating on the entries, in addition to the methods each,
204
+ # first/last, size, empty? and index/include?. Use #add to create new entries.
205
+ #
206
+ # Use #to_html to transform to an HTML ordered list.
207
+ class ToC
208
+
209
+ include Enumerable
210
+
211
+ # Array of entries.
212
+ attr_reader :entries
213
+
214
+ # Create new ToC with no entries.
215
+ def initialize()
216
+ @entries = []
217
+ end
218
+
219
+ ARRAY_METHODS = ["each", "first", "last", "size", "empty?", "include?", "index", "[]"]
220
+ (Enumerable.instance_methods + ARRAY_METHODS - ["entries"]).each do |method|
221
+ class_eval "def #{method}(*args, &block) ; entries.send(:#{method}, *args, &block) ; end", __FILE__, __LINE__
222
+ end
223
+
224
+ # :call-seq:
225
+ # add(url, title) => entry
226
+ # add(entry) => entry
227
+ #
228
+ # Adds (and returns) a new entry. The first form creates an entry with a link (must be a valid URL,
229
+ # use CGI.escape if necessary) and HTML-encoded title. The second form adds an existing entry,
230
+ # for example to a page.
231
+ def add(*args)
232
+ if ToCEntry === args.first
233
+ entry = args.shift
234
+ raise ArgumentError, "Can only accept a ToCEntry argument." unless args.empty?
235
+ else
236
+ entry = ToCEntry.new(*args)
237
+ end
238
+ entries << entry
239
+ entry
240
+ end
241
+
242
+ # :call-seq:
243
+ # to_html(options) => html
244
+ #
245
+ # Transforms this ToC into an HTML ordered list (OL) by calling to_html on each ToC entry.
246
+ #
247
+ # You can use the following options:
248
+ # * :nested -- For entries that are also ToC, expands them as well. You can specify how many
249
+ # levels (e.g. 1 to expand only once), or true to expand all levels.
250
+ # * :class -- Class to apply to the OL element.
251
+ #
252
+ # The +options+ argument can take the form of a Hash, list of symbols or both. Symbols are
253
+ # treated as +true+ for example:
254
+ # to_html(:nested, :class=>"toc")
255
+ # Is the same as:
256
+ # to_html(:nested=>true, :class=>"toc")
257
+ def to_html(*args)
258
+ options = Hash === args.last ? args.pop.clone : {}
259
+ args.each { |arg| options[arg.to_sym] = true }
260
+ cls = %{ class="#{options[:class]}"} if options[:class]
261
+ %{<ol #{cls}>#{map { |entry| entry.to_html(options) }}</ol>}
262
+ end
263
+
264
+ end
265
+
266
+
267
+ # Table of contents entry.
268
+ class ToCEntry < ToC
269
+
270
+ # The URL for this entry.
271
+ attr_reader :url
272
+
273
+ # The title of this entry.
274
+ attr_reader :title
275
+
276
+ # :call-seq:
277
+ # new(url, title)
278
+ #
279
+ # URL links to the ToC entry, and must be a valid URL (use CGI.escape is necessary). The title must
280
+ # be HTML-encoded (use CGI.escapeHTML if necessary).
281
+ def initialize(url, title)
282
+ super()
283
+ @url, @title = url, title
284
+ end
285
+
286
+ # :call-seq:
287
+ # to_html(nested?) => html
288
+ #
289
+ # Transforms this ToC entry into an HTML list item (LI). Depending on the nested argument,
290
+ # can also expand nested ToC.
291
+ def to_html(*args)
292
+ options = Hash === args.last ? args.pop.clone : {}
293
+ args.each { |arg| options[arg.to_sym] = true }
294
+ if options[:nested] && !empty?
295
+ nested = options[:nested].respond_to?(:to_i) && options[:nested].to_i > 0 ?
296
+ super(options.merge(:nested=>options[:nested] - 1)) : super(options)
297
+ end
298
+ %{<li><a href="#{url}">#{title}</a>#{nested}</li>}
299
+ end
300
+
301
+ end
302
+
303
+ end
@@ -0,0 +1,288 @@
1
+ module Docter
2
+
3
+ # A single documentation page. Has title, content and ToC.
4
+ #
5
+ # The content is HTML without the H1 header or HEAD element, ripe for including inside the template.
6
+ # The title is HTML-encoded text, the ToC is created from H2/H3 elements.
7
+ #
8
+ # The content is transformed in three stages:
9
+ # # Transform from the original format (e.g. Textile, plain text) to HTML.
10
+ # # Parse the HTML to extract the body, title and ToC. The content comes from the body, less any
11
+ # H1 element used for the title.
12
+ # # Apply filters each time the content is retrieved (form #content).
13
+ #
14
+ # Supported input formats include:
15
+ # * :plain -- Plain text, rendered as pre-formatted (pre).
16
+ # * :html -- The HTML body is extracted as the content, see below for ERB, title and ToC.
17
+ # * :textile -- Converted to HTML using RedCloth. See below for ERB, code blocks, title and ToC.
18
+ # * :markdown -- Converted to HTML using RedCloth. See below for ERB, code blocks, title and ToC.
19
+ #
20
+ # *EBR* To support dynamic content some formats are run through ERB first. You can use ERB to construct
21
+ # HTML, Textile, Markdown or content in any format the page is using. This happens before the content
22
+ # is converted to HTML.
23
+ #
24
+ # *Code blocks* Textile and Markdown support code blocks with syntax highlighting. To create a code block:
25
+ # {{{!lang
26
+ # ...
27
+ # }}}
28
+ # You can use !lang to specify a language for syntax highlighting, e.g. !ruby, !sql, !sh. See Syntax
29
+ # for more information. The language is optional, code blocks without it are treated as plain text.
30
+ # You can also use syntax highlighting from HTML by specifying the class attribute on the pre element.
31
+ #
32
+ # *Title* The recommended way to specify the page title is using an H1 header. Only one H1 header is allowed,
33
+ # and that element is removed from the content. Alternatively, you can also use the TITLE element, if both
34
+ # TITLE and H1 are used, they must match.
35
+ #
36
+ # If none of these options are available (e.g. for :plain) the title comes from the filename, treating
37
+ # underscore as space and capitalizing first letter, e.g. change_log.txt becomes "Change Log".
38
+ #
39
+ # *ToC* The table of contents is constructed from H2 and H3 headers. H2 headers provide the top-level sections,
40
+ # and H3 headers are nested inside H2 headers.
41
+ #
42
+ # The ToC links to each section based on the ID attribute of the header. If the header lacks an ID attribute,
43
+ # one is created using the header title, for example:
44
+ # h2. Getting Started
45
+ # becomes:
46
+ # <h2 id="getting_started">Getting Started</h2>
47
+ # You can rely on these IDs to link inside the page and across pages.
48
+ #
49
+ # *Filters* Runs the default chain of filters, or those specified by the :filters option. See Filter
50
+ # for more information. Filters are typically used to do post-processing on the HTML, e.g. syntax highlighting,
51
+ # URL rewriting.
52
+ class Page < Resource::Base
53
+
54
+ # ToC entry for a page. Obtains title and URL from the page, includes entries from the page
55
+ # ToC and can return the page itself.
56
+ class ToCEntryForPage < ToCEntry #:nodoc:
57
+
58
+ def initialize(page)
59
+ @page = page
60
+ end
61
+
62
+ def title()
63
+ @page.title
64
+ end
65
+
66
+ def url()
67
+ @page.path
68
+ end
69
+
70
+ def entries()
71
+ @page.toc.entries
72
+ end
73
+
74
+ end
75
+
76
+ # :call-seq:
77
+ # title() => string
78
+ #
79
+ # Returns the page title.
80
+ def title()
81
+ load
82
+ @title
83
+ end
84
+
85
+ def title=(title)
86
+ @title = title
87
+ end
88
+
89
+ # :call-seq:
90
+ # content() => string
91
+ #
92
+ # Returns the page content (HTML).
93
+ def content()
94
+ load
95
+ Filter.process(@content)
96
+ end
97
+
98
+ # :call-seq:
99
+ # toc() => ToC
100
+ #
101
+ # Returns the table of contents.
102
+ def toc()
103
+ load
104
+ @toc
105
+ end
106
+
107
+ # :call-seq:
108
+ # path() => filename
109
+ #
110
+ # Returns the path for this page. You can use this to link to the page from any other page.
111
+ #
112
+ # For example, if the page name is "intro.textile" the path will be "intro.html".
113
+ def path()
114
+ @path ||= File.basename(@filename).downcase.ext(".html")
115
+ end
116
+
117
+ # :call-seq;
118
+ # id() => string
119
+ #
120
+ # Returns fragment identifier for this page.
121
+ def id()
122
+ @id ||= title.gsub(/\s+/, "_").downcase
123
+ end
124
+
125
+ def entries() #:nodoc:
126
+ toc.entries
127
+ end
128
+
129
+ # :call-seq:
130
+ # toc_entry() => ToCEntry
131
+ #
132
+ # Returns a ToC entry for this page. Uses the +one_page+ argument to determine whether to return
133
+ # a link to #path of the fragment #id.
134
+ def toc_entry()
135
+ @toc_entry ||= ToCEntryForPage.new(self)
136
+ end
137
+
138
+ protected
139
+
140
+ def create_from_html(html, options)
141
+ parse(erb_this(html), options)
142
+ end
143
+
144
+ def create_from_plain(text, options)
145
+ parse(%{<pre class="text">#{CGI.escapeHTML(text)}</pre>}, options)
146
+ end
147
+
148
+ def create_from_textile(textile, options)
149
+ parse(use_redcloth(:textile, textile, options), options)
150
+ end
151
+
152
+ def create_from_markdown(markdown, options)
153
+ parse(use_redcloth(:markdown, markdown, options), options)
154
+ end
155
+
156
+ private
157
+
158
+ if defined?(::RedCloth)
159
+ # :call-seq:
160
+ # use_redcloth(format, text, options)
161
+ #
162
+ # Format may be :textile or :markdown. Runs erb_this on the text first to apply ERB code,
163
+ # processes code sections ({{{ ... }}}), and converts the Textile/Markdown text to HTML.
164
+ def use_redcloth(format, text, options)
165
+ text = erb_this(text)
166
+ # Process {{{ ... }}} code sections into pre tags.
167
+ text = text.gsub(/^\{\{\{([^\n]*)\n(.*?)\n\}\}\}/m) do
168
+ code, spec = $2, $1.scan(/^!(.*?)$/).to_s.strip
169
+ %{<notextile><pre class="#{spec.split(",").join(" ")}">#{CGI.escapeHTML(code)}</pre></notextile>}
170
+ end
171
+ # Create the HTML.
172
+ RedCloth.new(text, [:no_span_caps]).to_html(format)
173
+ end
174
+ else
175
+ def use_redcloth(format, text, options)
176
+ fail "You need to install RedCloth first:\n gem install RedCloth"
177
+ end
178
+ end
179
+
180
+ # :call-seq:
181
+ # parse(html, options)
182
+ #
183
+ # Parses HTML into the content, title and ToC. This method can take an HTML document and will extract
184
+ # its body. It can deduct the title from the H1 element, TITLE element or :title option, or filename.
185
+ def parse(html, options)
186
+ # Get the body (in most cases it's just the page). Make sure when we wreck havoc on the HTML,
187
+ # we're not changing any content passed to us.
188
+ body = html[regexp_element("body")] ? $2 : html.clone
189
+
190
+ # The correct structure is to use H1 for the document title (but TITLE element will also work).
191
+ # If both are used, they must both match. Two or more H1 is a sign you're using H1 instead of H2.
192
+ title = html.scan(regexp_element("title|h1")).map{ |parts| inner_text_from(parts.last) }.uniq
193
+ raise ArgumentError, "A page can only have one title, you can use the H1 element (preferred) or TITLE element, or both if they're the same. If you want to title sections, please use the H2 element" if title.size > 1
194
+ # Lacking that, we need to derive the title somehow.
195
+ title = title.first || options[:title] || (filename && filename.pathmap("%n").gsub("_", " ").capitalize) || "Untitled"
196
+ # Get rid of H1 header.
197
+ body.gsub!(regexp_element("h1"), "")
198
+
199
+ # Catalog all the major sections, based on the H2/H3 headers.
200
+ toc = ToC.new
201
+ body.gsub!(regexp_element("h[23]")) do |header|
202
+ tag, attributes, text = $1.downcase, $2.to_s, inner_text_from($3)
203
+ # Make sure all H2/H3 headers have a usable ID, create once if necessary.
204
+ id = CGI.unescape($3) if attributes[regexp_attribute("id")]
205
+ if id.blank?
206
+ id = CGI.unescapeHTML(text.downcase.gsub(" ", "_"))
207
+ header = %{<#{tag} #{attributes} id="#{id}">#{text}</#{tag}>}
208
+ end
209
+ if tag == "h2"
210
+ toc.add "##{id}", text
211
+ else
212
+ fail ArgumentError, "H3 section found without any H2 section." unless toc.last
213
+ toc.last.add "##{id}", text
214
+ end
215
+ header
216
+ end
217
+ @content, @title, @toc = body, title, toc
218
+ end
219
+
220
+ end
221
+
222
+
223
+ # Filters are used to process HTML before rendering, e.g to apply syntax highlighting, URL rewriting.
224
+ # To add a new filter:
225
+ # filter_for(:upcase) { |html| html.upcase }
226
+ module Filter
227
+
228
+ class << self
229
+
230
+ # :call-seq:
231
+ # list() => names
232
+ #
233
+ # Return the names of all defined filters.
234
+ def list()
235
+ @filters.keys
236
+ end
237
+
238
+ # :call-seq:
239
+ # filter_for(name) { |html| ... }
240
+ #
241
+ # Defines a filter for +name+ using a block that will transform the HTML.
242
+ def filter_for(name, &block)
243
+ @filters[name.to_sym] = block
244
+ self
245
+ end
246
+
247
+ # :call-seq:
248
+ # process(html) => html
249
+ # process(html, *name) => html
250
+ #
251
+ # Process the HTML using the available filters and returns the resulting HTML.
252
+ # The second form uses only the selected filters.
253
+ def process(html, *using)
254
+ using = using.flatten.compact
255
+ (using.empty? ? @filters.values : @filters.values_at(*using)).
256
+ inject(html) { |html, filter| filter.call(html) }
257
+ end
258
+
259
+ end
260
+
261
+ @filters = {}
262
+
263
+ end
264
+
265
+
266
+ class << self
267
+
268
+ # :call-seq:
269
+ # filter_for(name) { |html| ... }
270
+ #
271
+ # Defines a filter for +name+ using a block that will transform the HTML.
272
+ def filter_for(name, &block)
273
+ Filter.filter_for(name, &block)
274
+ end
275
+
276
+ # :call-seq:
277
+ # page(filename, options?)
278
+ # page(format, content, options?)
279
+ #
280
+ # The first form loads the page from the specified filename. The second creates the page from
281
+ # the content string based on the specified format.
282
+ def page(*args)
283
+ Page.new(*args)
284
+ end
285
+
286
+ end
287
+
288
+ end
@@ -0,0 +1,25 @@
1
+ module Docter
2
+ module Rake
3
+
4
+ class << self
5
+
6
+ def generate(target, collection, template, *args)
7
+ options = Hash === args.last ? args.pop.clone : {}
8
+ args.each { |arg| options[arg.to_sym] = true }
9
+ file target=>collection.dependencies + template.dependencies do |task|
10
+ collection.generate template, task.name, options[:one_page] ? :one_page : :all, options
11
+ end
12
+ end
13
+
14
+ def serve(task_name, collection, template, *args)
15
+ options = Hash === args.last ? args.pop.clone : {}
16
+ args.each { |arg| options[arg.to_sym] = true }
17
+ task task_name do
18
+ collection.serve template, options
19
+ end
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+ end