jekyll 0.11.2 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of jekyll might be problematic. Click here for more details.

@@ -11,52 +11,284 @@ require 'yaml'
11
11
 
12
12
  module Jekyll
13
13
  module WordPress
14
- def self.process(dbname, user, pass, host = 'localhost', table_prefix = 'wp_')
15
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
14
+
15
+ # Main migrator function. Call this to perform the migration.
16
+ #
17
+ # dbname:: The name of the database
18
+ # user:: The database user name
19
+ # pass:: The database user's password
20
+ # host:: The address of the MySQL database host. Default: 'localhost'
21
+ # options:: A hash table of configuration options.
22
+ #
23
+ # Supported options are:
24
+ #
25
+ # :table_prefix:: Prefix of database tables used by WordPress.
26
+ # Default: 'wp_'
27
+ # :clean_entities:: If true, convert non-ASCII characters to HTML
28
+ # entities in the posts, comments, titles, and
29
+ # names. Requires the 'htmlentities' gem to
30
+ # work. Default: true.
31
+ # :comments:: If true, migrate post comments too. Comments
32
+ # are saved in the post's YAML front matter.
33
+ # Default: true.
34
+ # :categories:: If true, save the post's categories in its
35
+ # YAML front matter.
36
+ # :tags:: If true, save the post's tags in its
37
+ # YAML front matter.
38
+ # :more_excerpt:: If true, when a post has no excerpt but
39
+ # does have a <!-- more --> tag, use the
40
+ # preceding post content as the excerpt.
41
+ # Default: true.
42
+ # :more_anchor:: If true, convert a <!-- more --> tag into
43
+ # two HTML anchors with ids "more" and
44
+ # "more-NNN" (where NNN is the post number).
45
+ # Default: true.
46
+ # :status:: Array of allowed post statuses. Only
47
+ # posts with matching status will be migrated.
48
+ # Known statuses are :publish, :draft, :private,
49
+ # and :revision. If this is nil or an empty
50
+ # array, all posts are migrated regardless of
51
+ # status. Default: [:publish].
52
+ #
53
+ def self.process(dbname, user, pass, host='localhost', options={})
54
+ options = {
55
+ :table_prefix => 'wp_',
56
+ :clean_entities => true,
57
+ :comments => true,
58
+ :categories => true,
59
+ :tags => true,
60
+ :more_excerpt => true,
61
+ :more_anchor => true,
62
+ :status => [:publish] # :draft, :private, :revision
63
+ }.merge(options)
64
+
65
+ if options[:clean_entities]
66
+ begin
67
+ require 'htmlentities'
68
+ rescue LoadError
69
+ STDERR.puts "Could not require 'htmlentities', so the " +
70
+ ":clean_entities option is now disabled."
71
+ options[:clean_entities] = false
72
+ end
73
+ end
16
74
 
17
75
  FileUtils.mkdir_p("_posts")
18
76
 
19
- # Reads a MySQL database via Sequel and creates a post file for each
20
- # post in wp_posts that has post_status = 'publish'. This restriction is
21
- # made because 'draft' posts are not guaranteed to have valid dates.
22
- query = "SELECT post_title, \
23
- post_name, \
24
- post_date, \
25
- post_content, \
26
- post_excerpt, \
27
- ID, \
28
- guid \
29
- FROM #{table_prefix}posts \
30
- WHERE post_status = 'publish' AND \
31
- post_type = 'post'"
32
-
33
- db[query].each do |post|
34
- # Get required fields and construct Jekyll compatible name.
35
- title = post[:post_title]
36
- slug = post[:post_name]
37
- date = post[:post_date]
38
- content = post[:post_content]
39
- name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
40
- slug]
41
-
42
- # Get the relevant fields as a hash, delete empty fields and convert
43
- # to YAML for the header.
44
- data = {
45
- 'layout' => 'post',
46
- 'title' => title.to_s,
47
- 'excerpt' => post[:post_excerpt].to_s,
48
- 'wordpress_id' => post[:ID],
49
- 'wordpress_url' => post[:guid],
50
- 'date' => date
51
- }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
52
-
53
- # Write out the data and content to file
54
- File.open("_posts/#{name}", "w") do |f|
55
- f.puts data
56
- f.puts "---"
57
- f.puts content
77
+ db = Sequel.mysql(dbname, :user => user, :password => pass,
78
+ :host => host, :encoding => 'utf8')
79
+
80
+ px = options[:table_prefix]
81
+
82
+ posts_query = "
83
+ SELECT
84
+ posts.ID AS `id`,
85
+ posts.guid AS `guid`,
86
+ posts.post_type AS `type`,
87
+ posts.post_status AS `status`,
88
+ posts.post_title AS `title`,
89
+ posts.post_name AS `slug`,
90
+ posts.post_date AS `date`,
91
+ posts.post_content AS `content`,
92
+ posts.post_excerpt AS `excerpt`,
93
+ posts.comment_count AS `comment_count`,
94
+ users.display_name AS `author`,
95
+ users.user_login AS `author_login`,
96
+ users.user_email AS `author_email`,
97
+ users.user_url AS `author_url`
98
+ FROM #{px}posts AS `posts`
99
+ LEFT JOIN #{px}users AS `users`
100
+ ON posts.post_author = users.ID"
101
+
102
+ if options[:status] and not options[:status].empty?
103
+ status = options[:status][0]
104
+ posts_query << "
105
+ WHERE posts.post_status = '#{status.to_s}'"
106
+ options[:status][1..-1].each do |status|
107
+ posts_query << " OR
108
+ posts.post_status = '#{status.to_s}'"
109
+ end
110
+ end
111
+
112
+ db[posts_query].each do |post|
113
+ process_post(post, db, options)
114
+ end
115
+ end
116
+
117
+
118
+ def self.process_post(post, db, options)
119
+ px = options[:table_prefix]
120
+
121
+ title = post[:title]
122
+ if options[:clean_entities]
123
+ title = clean_entities(title)
124
+ end
125
+
126
+ slug = post[:slug]
127
+ if !slug or slug.empty?
128
+ slug = sluggify(title)
129
+ end
130
+
131
+ date = post[:date] || Time.now
132
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
133
+ date.day, slug]
134
+ content = post[:content].to_s
135
+ if options[:clean_entities]
136
+ content = clean_entities(content)
137
+ end
138
+
139
+ excerpt = post[:excerpt].to_s
140
+
141
+ more_index = content.index(/<!-- *more *-->/)
142
+ more_anchor = nil
143
+ if more_index
144
+ if options[:more_excerpt] and
145
+ (post[:excerpt].nil? or post[:excerpt].empty?)
146
+ excerpt = content[0...more_index]
147
+ end
148
+ if options[:more_anchor]
149
+ more_link = "more"
150
+ content.sub!(/<!-- *more *-->/,
151
+ "<a id=\"more\"></a>" +
152
+ "<a id=\"more-#{post[:id]}\"></a>")
153
+ end
154
+ end
155
+
156
+ categories = []
157
+ tags = []
158
+
159
+ if options[:categories] or options[:tags]
160
+
161
+ cquery =
162
+ "SELECT
163
+ terms.name AS `name`,
164
+ ttax.taxonomy AS `type`
165
+ FROM
166
+ #{px}terms AS `terms`,
167
+ #{px}term_relationships AS `trels`,
168
+ #{px}term_taxonomy AS `ttax`
169
+ WHERE
170
+ trels.object_id = '#{post[:id]}' AND
171
+ trels.term_taxonomy_id = ttax.term_taxonomy_id AND
172
+ terms.term_id = ttax.term_id"
173
+
174
+ db[cquery].each do |term|
175
+ if options[:categories] and term[:type] == "category"
176
+ if options[:clean_entities]
177
+ categories << clean_entities(term[:name])
178
+ else
179
+ categories << term[:name]
180
+ end
181
+ elsif options[:tags] and term[:type] == "post_tag"
182
+ if options[:clean_entities]
183
+ tags << clean_entities(term[:name])
184
+ else
185
+ tags << term[:name]
186
+ end
187
+ end
58
188
  end
59
189
  end
190
+
191
+ comments = []
192
+
193
+ if options[:comments] and post[:comment_count].to_i > 0
194
+ cquery =
195
+ "SELECT
196
+ comment_ID AS `id`,
197
+ comment_author AS `author`,
198
+ comment_author_email AS `author_email`,
199
+ comment_author_url AS `author_url`,
200
+ comment_date AS `date`,
201
+ comment_date_gmt AS `date_gmt`,
202
+ comment_content AS `content`
203
+ FROM #{px}comments
204
+ WHERE
205
+ comment_post_ID = '#{post[:id]}' AND
206
+ comment_approved != 'spam'"
207
+
208
+
209
+ db[cquery].each do |comment|
210
+
211
+ comcontent = comment[:content].to_s
212
+ if comcontent.respond_to?(:force_encoding)
213
+ comcontent.force_encoding("UTF-8")
214
+ end
215
+ if options[:clean_entities]
216
+ comcontent = clean_entities(comcontent)
217
+ end
218
+ comauthor = comment[:author].to_s
219
+ if options[:clean_entities]
220
+ comauthor = clean_entities(comauthor)
221
+ end
222
+
223
+ comments << {
224
+ 'id' => comment[:id].to_i,
225
+ 'author' => comauthor,
226
+ 'author_email' => comment[:author_email].to_s,
227
+ 'author_url' => comment[:author_url].to_s,
228
+ 'date' => comment[:date].to_s,
229
+ 'date_gmt' => comment[:date_gmt].to_s,
230
+ 'content' => comcontent,
231
+ }
232
+ end
233
+
234
+ comments.sort!{ |a,b| a['id'] <=> b['id'] }
235
+ end
236
+
237
+ # Get the relevant fields as a hash, delete empty fields and
238
+ # convert to YAML for the header.
239
+ data = {
240
+ 'layout' => post[:type].to_s,
241
+ 'status' => post[:status].to_s,
242
+ 'published' => (post[:status].to_s == "publish"),
243
+ 'title' => title.to_s,
244
+ 'author' => post[:author].to_s,
245
+ 'author_login' => post[:author_login].to_s,
246
+ 'author_email' => post[:author_email].to_s,
247
+ 'author_url' => post[:author_url].to_s,
248
+ 'excerpt' => excerpt,
249
+ 'more_anchor' => more_anchor,
250
+ 'wordpress_id' => post[:id],
251
+ 'wordpress_url' => post[:guid].to_s,
252
+ 'date' => date,
253
+ 'categories' => options[:categories] ? categories : nil,
254
+ 'tags' => options[:tags] ? tags : nil,
255
+ 'comments' => options[:comments] ? comments : nil,
256
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
257
+
258
+ # Write out the data and content to file
259
+ File.open("_posts/#{name}", "w") do |f|
260
+ f.puts data
261
+ f.puts "---"
262
+ f.puts content
263
+ end
264
+ end
265
+
266
+
267
+ def self.clean_entities( text )
268
+ if text.respond_to?(:force_encoding)
269
+ text.force_encoding("UTF-8")
270
+ end
271
+ text = HTMLEntities.new.encode(text, :named)
272
+ # We don't want to convert these, it would break all
273
+ # HTML tags in the post and comments.
274
+ text.gsub!("&amp;", "&")
275
+ text.gsub!("&lt;", "<")
276
+ text.gsub!("&gt;", ">")
277
+ text.gsub!("&quot;", '"')
278
+ text.gsub!("&apos;", "'")
279
+ text
280
+ end
281
+
282
+
283
+ def self.sluggify( title )
284
+ begin
285
+ require 'unidecode'
286
+ title = title.to_ascii
287
+ rescue LoadError
288
+ STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
289
+ end
290
+ title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
60
291
  end
292
+
61
293
  end
62
294
  end
@@ -22,6 +22,8 @@ module Jekyll
22
22
  attr_accessor :data, :content, :output, :ext
23
23
  attr_accessor :date, :slug, :published, :tags, :categories
24
24
 
25
+ attr_reader :name
26
+
25
27
  # Initialize this Post instance.
26
28
  # +site+ is the Site
27
29
  # +base+ is the String path to the dir containing the post file
@@ -36,7 +38,11 @@ module Jekyll
36
38
 
37
39
  self.categories = dir.split('/').reject { |x| x.empty? }
38
40
  self.process(name)
39
- self.read_yaml(@base, name)
41
+ begin
42
+ self.read_yaml(@base, name)
43
+ rescue Exception => msg
44
+ raise FatalException.new("#{msg} in #{@base}/#{name}")
45
+ end
40
46
 
41
47
  #If we've added a date and time to the yaml, use that instead of the filename date
42
48
  #Means we'll sort correctly.
@@ -131,7 +137,7 @@ module Jekyll
131
137
  "title" => CGI.escape(slug),
132
138
  "i_day" => date.strftime("%d").to_i.to_s,
133
139
  "i_month" => date.strftime("%m").to_i.to_s,
134
- "categories" => categories.join('/'),
140
+ "categories" => categories.map { |c| URI.escape(c) }.join('/'),
135
141
  "output_ext" => self.output_ext
136
142
  }.inject(template) { |result, token|
137
143
  result.gsub(/:#{Regexp.escape token.first}/, token.last)
@@ -4,7 +4,7 @@ module Jekyll
4
4
 
5
5
  class Site
6
6
  attr_accessor :config, :layouts, :posts, :pages, :static_files,
7
- :categories, :exclude, :source, :dest, :lsi, :pygments,
7
+ :categories, :exclude, :include, :source, :dest, :lsi, :pygments,
8
8
  :permalink_style, :tags, :time, :future, :safe, :plugins, :limit_posts
9
9
 
10
10
  attr_accessor :converters, :generators
@@ -18,11 +18,12 @@ module Jekyll
18
18
  self.safe = config['safe']
19
19
  self.source = File.expand_path(config['source'])
20
20
  self.dest = File.expand_path(config['destination'])
21
- self.plugins = File.expand_path(config['plugins'])
21
+ self.plugins = Array(config['plugins']).map { |d| File.expand_path(d) }
22
22
  self.lsi = config['lsi']
23
23
  self.pygments = config['pygments']
24
24
  self.permalink_style = config['permalink'].to_sym
25
25
  self.exclude = config['exclude'] || []
26
+ self.include = config['include'] || []
26
27
  self.future = config['future']
27
28
  self.limit_posts = config['limit_posts'] || nil
28
29
 
@@ -72,8 +73,10 @@ module Jekyll
72
73
  # If safe mode is off, load in any Ruby files under the plugins
73
74
  # directory.
74
75
  unless self.safe
75
- Dir[File.join(self.plugins, "**/*.rb")].each do |f|
76
- require f
76
+ self.plugins.each do |plugins|
77
+ Dir[File.join(plugins, "**/*.rb")].each do |f|
78
+ require f
79
+ end
77
80
  end
78
81
  end
79
82
 
@@ -98,12 +101,12 @@ module Jekyll
98
101
  self.read_directories
99
102
  end
100
103
 
101
- # Read all the files in <source>/<dir>/_layouts and create a new Layout
102
- # object with each one.
104
+ # Read all the files in <source>/<layouts> and create a new Layout object
105
+ # with each one.
103
106
  #
104
107
  # Returns nothing.
105
- def read_layouts(dir = '')
106
- base = File.join(self.source, dir, "_layouts")
108
+ def read_layouts
109
+ base = File.join(self.source, self.config['layouts'])
107
110
  return unless File.exists?(base)
108
111
  entries = []
109
112
  Dir.chdir(base) { entries = filter_entries(Dir['*.*']) }
@@ -118,12 +121,12 @@ module Jekyll
118
121
  # that will become part of the site according to the rules in
119
122
  # filter_entries.
120
123
  #
121
- # dir - The String relative path of the directory to read.
124
+ # dir - The String relative path of the directory to read. Default: ''.
122
125
  #
123
126
  # Returns nothing.
124
127
  def read_directories(dir = '')
125
128
  base = File.join(self.source, dir)
126
- entries = Dir.chdir(base) { filter_entries(Dir['*']) }
129
+ entries = Dir.chdir(base) { filter_entries(Dir.entries('.')) }
127
130
 
128
131
  self.read_posts(dir)
129
132
 
@@ -173,7 +176,10 @@ module Jekyll
173
176
  self.posts.sort!
174
177
 
175
178
  # limit the posts if :limit_posts option is set
176
- self.posts = self.posts[-limit_posts, limit_posts] if limit_posts
179
+ if limit_posts
180
+ limit = self.posts.length < limit_posts ? self.posts.length : limit_posts
181
+ self.posts = self.posts[-limit, limit]
182
+ end
177
183
  end
178
184
 
179
185
  # Run each of the Generators.
@@ -189,12 +195,13 @@ module Jekyll
189
195
  #
190
196
  # Returns nothing.
191
197
  def render
198
+ payload = site_payload
192
199
  self.posts.each do |post|
193
- post.render(self.layouts, site_payload)
200
+ post.render(self.layouts, payload)
194
201
  end
195
202
 
196
203
  self.pages.each do |page|
197
- page.render(self.layouts, site_payload)
204
+ page.render(self.layouts, payload)
198
205
  end
199
206
 
200
207
  self.categories.values.map { |ps| ps.sort! { |a, b| b <=> a } }
@@ -250,7 +257,7 @@ module Jekyll
250
257
  end
251
258
  end
252
259
 
253
- # Constructs a Hash of Posts indexed by the specified Post attribute.
260
+ # Construct a Hash of Posts indexed by the specified Post attribute.
254
261
  #
255
262
  # post_attr - The String name of the Post attribute.
256
263
  #
@@ -300,12 +307,12 @@ module Jekyll
300
307
  # or are excluded in the site configuration, unless they are web server
301
308
  # files such as '.htaccess'.
302
309
  #
303
- # entries - The Array of file/directory entries to filter.
310
+ # entries - The Array of String file/directory entries to filter.
304
311
  #
305
312
  # Returns the Array of filtered entries.
306
313
  def filter_entries(entries)
307
314
  entries = entries.reject do |e|
308
- unless ['.htaccess'].include?(e)
315
+ unless self.include.include?(e)
309
316
  ['.', '_', '#'].include?(e[0..0]) ||
310
317
  e[-1..-1] == '~' ||
311
318
  self.exclude.include?(e) ||