sutty-migration 0.2.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +50 -0
- data/lib/sutty_migration/data.rb +38 -30
- data/lib/sutty_migration/jekyll/document_creator.rb +66 -22
- data/lib/sutty_migration/wordpress.rb +68 -20
- data/lib/sutty_migration/wordpress_xml/attachment.rb +69 -0
- data/lib/sutty_migration/wordpress_xml/post.rb +171 -0
- data/lib/sutty_migration/wordpress_xml.rb +154 -0
- metadata +47 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8de345679f1c94e0bda19ea99e7d454924bf5aa86c0281bc6a61feab84a3a1cd
|
4
|
+
data.tar.gz: 5c62c2f467dafb6bc822dbe7c334280776ee173fea3a045cf90cc401030558b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f1a13ba1cb0c5e75ffc2aae11225b83cc8ee426ccc2c5b3f5e6a8ea6d52df0dee569f67cb4d1319a8f635279edf07604b05ac2ecbc3f66797b046bfe66a14495
|
7
|
+
data.tar.gz: 2d0fc815a65e19ff74bac83298e71f288b5daf5bd2ab141028eaec6a048e1a27903a9e804638b29cf8b6f13302077f161ff7f253d3e0d3b75b561840c59c54d6
|
data/README.md
CHANGED
@@ -23,6 +23,7 @@ Add the plugin to your `_config.yml`:
|
|
23
23
|
```yaml
|
24
24
|
plugins:
|
25
25
|
- sutty-migration
|
26
|
+
array_separator: ','
|
26
27
|
```
|
27
28
|
|
28
29
|
Compile a CSV file with the following required fields:
|
@@ -114,6 +115,55 @@ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
|
114
115
|
end
|
115
116
|
```
|
116
117
|
|
118
|
+
### WordPress XML
|
119
|
+
|
120
|
+
If you have the XML dump from a WordPress site, you can migrate content
|
121
|
+
by writing a migration plugin.
|
122
|
+
|
123
|
+
```ruby
|
124
|
+
# frozen_string_literal: true
|
125
|
+
|
126
|
+
require 'sutty_migration/jekyll/document_creator'
|
127
|
+
require 'sutty_migration/wordpress_xml'
|
128
|
+
require 'jekyll-write-and-commit-changes'
|
129
|
+
require 'securerandom'
|
130
|
+
|
131
|
+
# Run after reading the site
|
132
|
+
Jekyll::Hooks.register :site, :post_read do |site|
|
133
|
+
# Put the XML dump at _files/wordpress.xml
|
134
|
+
xml = SuttyMigration::WordpressXml.new site: site, file: '_files/wordpress.xml'
|
135
|
+
|
136
|
+
# Download all files
|
137
|
+
xml.attachments.values.map(&:download)
|
138
|
+
|
139
|
+
# Migrate posts. You can move metadata around and recover
|
140
|
+
# relationships or any info your theme requires.
|
141
|
+
xml.posts.values.each do |post|
|
142
|
+
# Update documents already migrated.
|
143
|
+
doc = Jekyll::Document.find_or_create(site: site, collection: locale, title: post.title, slug: post.slug, date: post.date)
|
144
|
+
# Don't change the UUIDv4
|
145
|
+
d.data['uuid'] ||= SecureRandom.uuid
|
146
|
+
d.data['draft'] = post.draft?
|
147
|
+
d.data['layout'] = 'post'
|
148
|
+
d.data['last_modified_at'] = post.last_modified_at
|
149
|
+
|
150
|
+
d.data['categories'] = post.categories.map { |c| c[:title] }
|
151
|
+
d.data['tags'] = post.tags.map { |t| t[:title] }
|
152
|
+
|
153
|
+
d.data['author'] = post.author[:email]
|
154
|
+
d.data['description'] = post.description
|
155
|
+
d.content = post.content
|
156
|
+
|
157
|
+
doc.save
|
158
|
+
rescue => e
|
159
|
+
Jekyll.logger.warn "Couldn't migrate #{post.title}"
|
160
|
+
end
|
161
|
+
|
162
|
+
exit # Stop here
|
163
|
+
end
|
164
|
+
```
|
165
|
+
|
166
|
+
|
117
167
|
## Contributing
|
118
168
|
|
119
169
|
Bug reports and pull requests are welcome on 0xacab.org at
|
data/lib/sutty_migration/data.rb
CHANGED
@@ -13,6 +13,8 @@ require_relative 'jekyll/document_creator'
|
|
13
13
|
Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
14
14
|
documents = site.documents
|
15
15
|
|
16
|
+
array_separator = site.config.fetch('array_separator', ',')
|
17
|
+
|
16
18
|
site.data['layouts']&.each do |name, layout|
|
17
19
|
site.data.dig('migration', name)&.each do |row|
|
18
20
|
row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
|
@@ -24,41 +26,47 @@ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
|
24
26
|
end
|
25
27
|
end
|
26
28
|
|
27
|
-
document ||=
|
29
|
+
document ||= begin
|
30
|
+
data = row.slice(*%w[date slug title]).transform_keys(&:to_sym)
|
31
|
+
Jekyll::Document.find_or_create(site: site, collection: 'posts', **data)
|
32
|
+
end
|
33
|
+
next unless document
|
28
34
|
|
29
35
|
row.each do |attribute, value|
|
30
|
-
next
|
36
|
+
next if value.nil? || value.blank?
|
37
|
+
|
38
|
+
value.strip! if value.is_a? String
|
31
39
|
|
32
40
|
row[attribute] =
|
33
41
|
case layout.dig(attribute, 'type')
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
42
|
+
when 'string' then value.tr("\n", ' ').squeeze(' ')
|
43
|
+
when 'text' then value.gsub("\n", "\n\n")
|
44
|
+
when 'tel' then value.tr("\n", ' ').squeeze(' ')
|
45
|
+
# TODO: validate
|
46
|
+
when 'color' then value.tr("\n", ' ').squeeze(' ')
|
47
|
+
when 'date' then Jekyll::Utils.parse_date(value)
|
48
|
+
# TODO: validate
|
49
|
+
when 'email' then value.tr("\n", ' ').squeeze(' ')
|
50
|
+
# TODO: validate
|
51
|
+
when 'url' then value.tr("\n", ' ').squeeze(' ')
|
52
|
+
when 'content' then value.gsub("\n", "\n\n")
|
53
|
+
when 'markdown_content' then value.gsub("\n", "\n\n")
|
54
|
+
when 'markdown' then value.gsub("\n", "\n\n")
|
55
|
+
when 'number' then value.to_i
|
56
|
+
when 'order' then value.to_i
|
57
|
+
when 'boolean' then !value.strip.empty?
|
58
|
+
when 'array' then value.split(array_separator).map(&:strip)
|
59
|
+
# TODO: process values from the default array
|
60
|
+
when 'predefined_array' then value.split(array_separator).map(&:strip)
|
61
|
+
when 'image' then { 'path' => value, 'description' => '' }
|
62
|
+
when 'file' then { 'path' => value, 'description' => '' }
|
63
|
+
when 'geo' then %w[lat lng].zip(value.split(array_separator, 2).map(&:to_f)).to_h
|
64
|
+
when 'belongs_to' then value
|
65
|
+
when 'has_many' then value.split(array_separator).map(&:strip)
|
66
|
+
when 'has_and_belongs_to_many' then value.split(array_separator).map(&:strip)
|
67
|
+
when 'related_posts' then value.split(array_separator).map(&:strip)
|
68
|
+
when 'locales' then value.split(array_separator).map(&:strip)
|
69
|
+
else value
|
62
70
|
end
|
63
71
|
end
|
64
72
|
|
@@ -7,31 +7,75 @@ module SuttyMigration
|
|
7
7
|
module Jekyll
|
8
8
|
module DocumentCreator
|
9
9
|
class DocumentExists < ArgumentError; end
|
10
|
+
|
10
11
|
def self.included(base)
|
11
12
|
base.class_eval do
|
13
|
+
class << self
|
14
|
+
# Creates a new document in a collection or fails if it already
|
15
|
+
# exists.
|
16
|
+
#
|
17
|
+
# @param :site [Jekyll::Site] Jekyll site
|
18
|
+
# @param :date [Time] Post date
|
19
|
+
# @param :title [String] Post title
|
20
|
+
# @param :slug [String] Post slug, slugified title if empty
|
21
|
+
# @param :collection [Jekyll::Collection,String] Collection label or collection
|
22
|
+
# @return [Jekyll::Document] A new document
|
23
|
+
def create(site:, date:, title:, collection:, slug: nil)
|
24
|
+
collection = site.collections[collection] if collection.is_a? String
|
25
|
+
slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
|
26
|
+
basename = "#{date.strftime('%F')}-#{slug}.markdown"
|
27
|
+
path = File.join(collection.directory, basename)
|
12
28
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
raise DocumentExists, "#{path} already exists" if File.exist? path
|
30
|
+
|
31
|
+
::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
|
32
|
+
collection.docs << document
|
33
|
+
document.data['title'] = title
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Finds a document by its relative path or creates it if it
|
38
|
+
# doesn't exist. Helpful for idempotent migrations (create or
|
39
|
+
# update actions)
|
40
|
+
#
|
41
|
+
# @param :site [Jekyll::Site] Jekyll site
|
42
|
+
# @param :date [Time] Post date
|
43
|
+
# @param :title [String] Post title
|
44
|
+
# @param :slug [String] Post slug, slugified title if empty
|
45
|
+
# @param :collection [Jekyll::Collection,String] Collection label or collection
|
46
|
+
# @return [Jekyll::Document] The found document or a new one
|
47
|
+
def find_or_create(site:, date:, title:, collection:, slug: nil)
|
48
|
+
collection = site.collections[collection] if collection.is_a? String
|
49
|
+
slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
|
50
|
+
basename = "#{date.strftime('%F')}-#{slug}.markdown"
|
51
|
+
path = File.join(collection.relative_directory, basename)
|
52
|
+
|
53
|
+
return find(site: site, relative_path: path) if File.exist?(path)
|
54
|
+
|
55
|
+
create(site: site, date: date, title: title, slug: slug, collection: collection)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Finds a document by its relative path
|
59
|
+
#
|
60
|
+
# @param :site [Jekyll::Site]
|
61
|
+
# @param :relative_path [String]
|
62
|
+
# @return [Jekyll::Document,Nil]
|
63
|
+
def find(site:, relative_path:)
|
64
|
+
indexed_documents_by_relative_path(site)[relative_path]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Index documents by relative path for faster finding
|
68
|
+
#
|
69
|
+
# @param [Jekyll::Site]
|
70
|
+
# @return [Hash]
|
71
|
+
def indexed_documents_by_relative_path(site)
|
72
|
+
@indexed_documents_by_relative_path ||= site.documents.reduce({}) do |idx, doc|
|
73
|
+
idx.tap do |i|
|
74
|
+
i[doc.relative_path] = doc
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
35
79
|
end
|
36
80
|
end
|
37
81
|
end
|
@@ -43,7 +43,9 @@ module SuttyMigration
|
|
43
43
|
pfx = "#{prefix}#{blog[:blog_id]}_" if blog[:blog_id] > 1
|
44
44
|
pfx ||= prefix
|
45
45
|
|
46
|
-
[
|
46
|
+
[blog[:blog_id],
|
47
|
+
blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit,
|
48
|
+
multisite: self))]
|
47
49
|
end.to_h
|
48
50
|
end
|
49
51
|
|
@@ -94,11 +96,11 @@ module SuttyMigration
|
|
94
96
|
end
|
95
97
|
|
96
98
|
Faraday.get(url) do |req|
|
97
|
-
req.options.on_data =
|
99
|
+
req.options.on_data = proc do |chunk, downloaded_bytes|
|
98
100
|
f.write chunk
|
99
101
|
|
100
102
|
if progress
|
101
|
-
progress.progress =
|
103
|
+
progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
|
102
104
|
end
|
103
105
|
end
|
104
106
|
end
|
@@ -130,10 +132,31 @@ module SuttyMigration
|
|
130
132
|
p.map do |post|
|
131
133
|
# Sequel parses dates on localtime
|
132
134
|
post[:date] = ::Jekyll::Utils.parse_date(post[:date]) unless post[:date].blank?
|
133
|
-
|
135
|
+
unless post[:last_modified_at].blank?
|
136
|
+
post[:last_modified_at] =
|
137
|
+
::Jekyll::Utils.parse_date(post[:last_modified_at])
|
138
|
+
end
|
134
139
|
|
135
|
-
post[:front_matter] =
|
136
|
-
|
140
|
+
post[:front_matter] =
|
141
|
+
begin
|
142
|
+
unless post[:front_matter].blank?
|
143
|
+
JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values do |v|
|
144
|
+
v.size == 1 ? v.first : v
|
145
|
+
end
|
146
|
+
end
|
147
|
+
rescue JSON::ParserError
|
148
|
+
{}
|
149
|
+
end
|
150
|
+
post[:terms] =
|
151
|
+
begin
|
152
|
+
unless post[:terms].blank?
|
153
|
+
JSON.parse(post[:terms]).transform_keys(&:to_sym).transform_values do |v|
|
154
|
+
v.size == 1 ? v.first : v
|
155
|
+
end
|
156
|
+
end
|
157
|
+
rescue JSON::ParserError
|
158
|
+
{}
|
159
|
+
end
|
137
160
|
end
|
138
161
|
end
|
139
162
|
end
|
@@ -167,7 +190,7 @@ module SuttyMigration
|
|
167
190
|
<<~EOQ
|
168
191
|
select
|
169
192
|
u.*
|
170
|
-
#{
|
193
|
+
#{', json_group_object(m.meta_key, m.meta_value) as meta' if with_meta}
|
171
194
|
from #{pfx}users as u
|
172
195
|
#{"left join #{pfx}usermeta as m on m.user_id = u.id" if with_meta}
|
173
196
|
group by u.id
|
@@ -203,31 +226,56 @@ module SuttyMigration
|
|
203
226
|
p.menu_order as menu_order,
|
204
227
|
p.post_mime_type as mime_type,
|
205
228
|
p.comment_count as comment_count
|
206
|
-
#{
|
207
|
-
#{
|
229
|
+
#{', f.front_matter as front_matter' if with_meta}
|
230
|
+
#{', t.terms as terms' if with_meta}
|
208
231
|
from #{prefix}posts as p
|
209
|
-
left join #{
|
210
|
-
#{"left join (#{terms_query
|
232
|
+
#{"left join (#{meta_query}) as f on f.post_id = p.ID" if with_meta}
|
233
|
+
#{"left join (#{terms_query}) as t on t.post_id = p.ID" if with_meta}
|
211
234
|
#{"where p.post_type = '#{layout}'" if layout}
|
212
235
|
group by p.ID
|
213
236
|
EOQ
|
214
237
|
end
|
215
238
|
|
239
|
+
# Recover the post meta as a JSON object with multiple values
|
240
|
+
# converted to arrays
|
241
|
+
#
|
242
|
+
# @return [String]
|
243
|
+
def meta_query
|
244
|
+
<<~EOQ
|
245
|
+
select
|
246
|
+
post_id,
|
247
|
+
json_group_object(meta_key, json(meta_values)) as front_matter
|
248
|
+
from (
|
249
|
+
select
|
250
|
+
post_id,
|
251
|
+
meta_key,
|
252
|
+
json_group_array(meta_value) as meta_values
|
253
|
+
from #{prefix}postmeta
|
254
|
+
group by post_id, meta_key
|
255
|
+
)
|
256
|
+
group by post_id
|
257
|
+
EOQ
|
258
|
+
end
|
259
|
+
|
216
260
|
# Term taxonomy query
|
217
261
|
#
|
218
262
|
# @param :layout [String] Layout name
|
219
263
|
# @return [String]
|
220
|
-
def terms_query
|
264
|
+
def terms_query
|
221
265
|
<<~EOQ
|
222
266
|
select
|
223
|
-
|
224
|
-
json_group_object(
|
225
|
-
from
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
267
|
+
post_id,
|
268
|
+
json_group_object(taxonomy, json(terms)) as terms
|
269
|
+
from (
|
270
|
+
select
|
271
|
+
r.object_id as post_id,
|
272
|
+
tt.taxonomy,
|
273
|
+
json_group_array(t.name) as terms
|
274
|
+
from #{prefix}term_relationships as r
|
275
|
+
left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
|
276
|
+
left join #{prefix}terms as t on t.term_id = tt.term_id
|
277
|
+
group by r.object_id)
|
278
|
+
group by post_id
|
231
279
|
EOQ
|
232
280
|
end
|
233
281
|
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'post'
|
4
|
+
require 'php-serialize'
|
5
|
+
require 'faraday'
|
6
|
+
require 'progressbar'
|
7
|
+
|
8
|
+
module SuttyMigration
|
9
|
+
class WordpressXml
|
10
|
+
# Represents an attachment or uploaded file.
|
11
|
+
class Attachment < Post
|
12
|
+
# File URL
|
13
|
+
#
|
14
|
+
# @return [String]
|
15
|
+
def attachment_url
|
16
|
+
@attachment_url ||= attribute_value 'attachment_url'
|
17
|
+
end
|
18
|
+
|
19
|
+
# File destination
|
20
|
+
#
|
21
|
+
# @return [String]
|
22
|
+
def dest
|
23
|
+
@dest ||= URI(attachment_url).path.sub(%r{\A/}, '')
|
24
|
+
end
|
25
|
+
|
26
|
+
# Metadata, with file information as a Hash
|
27
|
+
#
|
28
|
+
# @return [Hash]
|
29
|
+
def meta
|
30
|
+
super.tap do |m|
|
31
|
+
m['_wp_attachment_metadata'] = PHP.unserialize m['_wp_attachment_metadata']
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Download the file if it doesn't exist. Optionally show a
|
36
|
+
# progress bar.
|
37
|
+
#
|
38
|
+
# @param :progress [Boolean]
|
39
|
+
# @return [Boolean]
|
40
|
+
def download(progress: true)
|
41
|
+
return true if File.exist? dest
|
42
|
+
|
43
|
+
::Jekyll.logger.info "Downloading #{dest}"
|
44
|
+
|
45
|
+
FileUtils.mkdir_p File.dirname(dest)
|
46
|
+
|
47
|
+
File.open(dest, 'w') do |f|
|
48
|
+
if progress
|
49
|
+
head = Faraday.head(attachment_url)
|
50
|
+
content_length = head.headers['content-length'].to_i
|
51
|
+
progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
|
52
|
+
end
|
53
|
+
|
54
|
+
Faraday.get(attachment_url) do |req|
|
55
|
+
req.options.on_data = proc do |chunk, downloaded_bytes|
|
56
|
+
f.write chunk
|
57
|
+
|
58
|
+
if progress
|
59
|
+
progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
File.exist? dest
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'wordpress_formatting/wpautop'
|
4
|
+
require 'jekyll/utils'
|
5
|
+
|
6
|
+
module SuttyMigration
|
7
|
+
class WordpressXml
|
8
|
+
# Represents a WordPress post
|
9
|
+
class Post
|
10
|
+
attr_reader :wordpress, :item
|
11
|
+
|
12
|
+
# @param :wordpress [SuttyMigration::WordpressXml]
|
13
|
+
# @param :item [Nokogiri::XML::Element]
|
14
|
+
def initialize(wordpress:, item:)
|
15
|
+
@wordpress = wordpress
|
16
|
+
@item = item
|
17
|
+
end
|
18
|
+
|
19
|
+
def inspect
|
20
|
+
"#<SuttyMigration::WordpressXml::Post title=\"#{title}\">"
|
21
|
+
end
|
22
|
+
|
23
|
+
# Post ID
|
24
|
+
#
|
25
|
+
# @return [Integer]
|
26
|
+
def id
|
27
|
+
@id ||= attribute_value('post_id').to_i
|
28
|
+
end
|
29
|
+
|
30
|
+
# Permalink. Absolute URL to the post.
|
31
|
+
#
|
32
|
+
# @return [String]
|
33
|
+
def permalink
|
34
|
+
@permalink ||= attribute_value('link').sub(wordpress.url, '')
|
35
|
+
end
|
36
|
+
|
37
|
+
# Title
|
38
|
+
#
|
39
|
+
# @return [String]
|
40
|
+
def title
|
41
|
+
@title ||= attribute_value('title')
|
42
|
+
end
|
43
|
+
|
44
|
+
# Description
|
45
|
+
#
|
46
|
+
# @return [String]
|
47
|
+
def description
|
48
|
+
@description ||= attribute_value('description')
|
49
|
+
end
|
50
|
+
|
51
|
+
# Slug ("post name")
|
52
|
+
#
|
53
|
+
# @return [String]
|
54
|
+
def slug
|
55
|
+
@slug ||= attribute_value('post_name')
|
56
|
+
end
|
57
|
+
|
58
|
+
# Publication date.
|
59
|
+
#
|
60
|
+
# WordPress can store this date in three different fields and
|
61
|
+
# sometimes they come empty or invalid.
|
62
|
+
#
|
63
|
+
# @return [Time]
|
64
|
+
def date
|
65
|
+
@date ||= %w[pubDate post_date_gmt post_date].map do |date_attr|
|
66
|
+
::Jekyll::Utils.parse_date attribute_value(date_attr)
|
67
|
+
rescue StandardError
|
68
|
+
end.compact.first
|
69
|
+
end
|
70
|
+
|
71
|
+
# Modification date.
|
72
|
+
#
|
73
|
+
# @return [Time]
|
74
|
+
def last_modified_at
|
75
|
+
@last_modified_at ||= ::Jekyll::Utils.parse_date attribute_value('post_modified_gmt')
|
76
|
+
end
|
77
|
+
|
78
|
+
# Content as HTML, with site URL removed.
|
79
|
+
#
|
80
|
+
# @return [String]
|
81
|
+
def content
|
82
|
+
@content ||= WordpressFormatting::Wpautop.wpautop(attribute_value('encoded')).gsub(
|
83
|
+
/ (href|src)="#{wordpress.url}/, ' \\1="'
|
84
|
+
)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Author attributes.
|
88
|
+
#
|
89
|
+
# @return [Hash]
|
90
|
+
def author
|
91
|
+
@author ||= wordpress.authors[attribute_value('creator')]
|
92
|
+
end
|
93
|
+
|
94
|
+
# Post password. Use with jekyll-crypto.
|
95
|
+
#
|
96
|
+
# @return [String]
|
97
|
+
def password
|
98
|
+
@password ||= attribute_value 'post_password'
|
99
|
+
end
|
100
|
+
|
101
|
+
# Tags with attributes.
|
102
|
+
#
|
103
|
+
# @return [Hash]
|
104
|
+
def tags
|
105
|
+
@tags ||= item.css('category').select do |c|
|
106
|
+
c[:domain] == 'post_tag'
|
107
|
+
end.map do |c|
|
108
|
+
wordpress.tags[c[:nicename]]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Categories with attributes.
|
113
|
+
#
|
114
|
+
# @return [Hash]
|
115
|
+
def categories
|
116
|
+
@categories ||= item.css('category').select do |c|
|
117
|
+
c[:domain] == 'category'
|
118
|
+
end.map do |c|
|
119
|
+
wordpress.categories[c[:nicename]]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Metadata. Plugins store useful information here. Duplicated
|
124
|
+
# keys are returned as an Array of values.
|
125
|
+
#
|
126
|
+
# @return [Hash]
|
127
|
+
def meta
|
128
|
+
@meta ||= {}.tap do |meta|
|
129
|
+
item.css('postmeta').each do |m|
|
130
|
+
key = m.css('meta_key').text
|
131
|
+
value = m.css('meta_value').text
|
132
|
+
|
133
|
+
case meta[key]
|
134
|
+
when nil then meta[key] = value
|
135
|
+
when String then meta[key] = [meta[key], value]
|
136
|
+
when Array then meta[key] << value
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Order. Higher are sorted on top by jekyll-order.
|
143
|
+
#
|
144
|
+
# @return [Integer]
|
145
|
+
def order
|
146
|
+
@order ||= attribute_value 'is_sticky'
|
147
|
+
end
|
148
|
+
|
149
|
+
# Publication status
|
150
|
+
#
|
151
|
+
# @return [Boolean]
|
152
|
+
def published?
|
153
|
+
@published ||= attribute_value('status') == 'publish'
|
154
|
+
end
|
155
|
+
|
156
|
+
# Publication status
|
157
|
+
#
|
158
|
+
# @return [Boolean]
|
159
|
+
def draft?
|
160
|
+
@draft ||= attribute_value('status') == 'draft'
|
161
|
+
end
|
162
|
+
|
163
|
+
# Get a value from the attribute
|
164
|
+
#
|
165
|
+
# @return [String]
|
166
|
+
def attribute_value(key)
|
167
|
+
item.at_css(key).text
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require_relative 'wordpress_xml/post'
|
5
|
+
require_relative 'wordpress_xml/attachment'
|
6
|
+
|
7
|
+
module SuttyMigration
|
8
|
+
# Understands the XML dump generated by Wordpress and creates
|
9
|
+
# Jekyll::Documents
|
10
|
+
class WordpressXml
|
11
|
+
attr_reader :site, :file, :xml
|
12
|
+
|
13
|
+
# @param :site [Jekyll::Site] Jekyll site
|
14
|
+
# @param :file [String] File path
|
15
|
+
def initialize(site:, file:)
|
16
|
+
@site = site
|
17
|
+
@file = file
|
18
|
+
@xml = Nokogiri::XML File.read(file)
|
19
|
+
|
20
|
+
# Make things easier by removing namespaces.
|
21
|
+
xml.remove_namespaces!
|
22
|
+
end
|
23
|
+
|
24
|
+
def inspect
|
25
|
+
'#<SuttyMigration::WordpressXml>'
|
26
|
+
end
|
27
|
+
|
28
|
+
# Site URL
|
29
|
+
#
|
30
|
+
# @return [String]
|
31
|
+
def url
|
32
|
+
@url ||= attribute_value(xml, 'channel > link')
|
33
|
+
end
|
34
|
+
|
35
|
+
# Site title
|
36
|
+
#
|
37
|
+
# @return [String]
|
38
|
+
def title
|
39
|
+
@title ||= attribute_value(xml, 'channel > title')
|
40
|
+
end
|
41
|
+
|
42
|
+
# Description
|
43
|
+
#
|
44
|
+
# @return [String]
|
45
|
+
def description
|
46
|
+
@description ||= attribute_value(xml, 'channel > description')
|
47
|
+
end
|
48
|
+
|
49
|
+
# Language
|
50
|
+
#
|
51
|
+
# TODO: Migrate multilanguage sites.
|
52
|
+
#
|
53
|
+
# @return [String]
|
54
|
+
def language
|
55
|
+
@language ||= attribute_value(xml, 'channel > language')
|
56
|
+
end
|
57
|
+
|
58
|
+
# Authors with attributes, indexed by author email.
|
59
|
+
#
|
60
|
+
# @return [Hash]
|
61
|
+
def authors
|
62
|
+
@authors ||= xml.css('channel > author').map do |author|
|
63
|
+
{
|
64
|
+
attribute_value(author, 'author_email') => {
|
65
|
+
id: attribute_value(author, 'author_id').to_i,
|
66
|
+
display_name: attribute_value(author, 'author_display_name'),
|
67
|
+
first_name: attribute_value(author, 'author_first_name'),
|
68
|
+
last_name: attribute_value(author, 'author_last_name'),
|
69
|
+
email: attribute_value(author, 'author_email')
|
70
|
+
|
71
|
+
}
|
72
|
+
}
|
73
|
+
end.reduce(&:merge)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Categories with attributes, indexed by slug ("nicename")
|
77
|
+
#
|
78
|
+
# @return [Hash]
|
79
|
+
def categories
|
80
|
+
@categories ||= xml.css('channel > category').map do |category|
|
81
|
+
{
|
82
|
+
attribute_value(category, 'category_nicename') => {
|
83
|
+
id: attribute_value(category, 'term_id').to_i,
|
84
|
+
title: attribute_value(category, 'cat_name'),
|
85
|
+
parent: attribute_value(category, 'category_parent'),
|
86
|
+
slug: attribute_value(category, 'category_nicename')
|
87
|
+
}
|
88
|
+
}
|
89
|
+
end.reduce(&:merge)
|
90
|
+
end
|
91
|
+
|
92
|
+
# Tags with attributes, indexed by slug
|
93
|
+
#
|
94
|
+
# @return [Hash]
|
95
|
+
def tags
|
96
|
+
@tags ||= xml.css('channel > tag').map do |tag|
|
97
|
+
{
|
98
|
+
attribute_value(tag, 'tag_slug') => {
|
99
|
+
id: attribute_value(tag, 'term_id').to_i,
|
100
|
+
title: attribute_value(tag, 'tag_name'),
|
101
|
+
slug: attribute_value(tag, 'tag_slug')
|
102
|
+
}
|
103
|
+
}
|
104
|
+
end.reduce(&:merge)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Posts, indexed by ID
|
108
|
+
#
|
109
|
+
# @return [Hash]
|
110
|
+
def posts
|
111
|
+
@posts ||= items_find_by('post_type', 'post').map do |post|
|
112
|
+
{ attribute_value(post, 'post_id').to_i => Post.new(wordpress: self, item: post) }
|
113
|
+
end.reduce(&:merge)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Pages, indexed by ID
|
117
|
+
#
|
118
|
+
# @return [Hash]
|
119
|
+
def pages
|
120
|
+
@pages ||= items_find_by('post_type', 'page').map do |page|
|
121
|
+
{ attribute_value(page, 'post_id').to_i => Post.new(wordpress: self, item: page) }
|
122
|
+
end.reduce(&:merge)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Attachments, indexed by ID
|
126
|
+
#
|
127
|
+
# @return [Hash]
|
128
|
+
def attachments
|
129
|
+
@attachments ||= items_find_by('post_type', 'attachment').map do |attachment|
|
130
|
+
{ attribute_value(attachment, 'post_id').to_i => Attachment.new(wordpress: self, item: attachment) }
|
131
|
+
end.reduce(&:merge)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Find items by attribute and value
|
135
|
+
#
|
136
|
+
# @param [String] Attribute name
|
137
|
+
# @param [String] Attribute value
|
138
|
+
# @return [Nokogiri::NodeSet]
|
139
|
+
def items_find_by(attribute, value)
|
140
|
+
xml.css('channel > item').select do |item|
|
141
|
+
attribute_value(item, attribute) == value
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Get element's attribute value
|
146
|
+
#
|
147
|
+
# @param [Nokogiri::XML::Element]
|
148
|
+
# @param [String]
|
149
|
+
# @return [String]
|
150
|
+
def attribute_value(element, attribute)
|
151
|
+
element.at_css(attribute).text
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sutty-migration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- f
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jekyll
|
@@ -108,6 +108,48 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '5.45'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: wordpress-formatting
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.1.0
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.1.0
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: nokogiri
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '1.11'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '1.11'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: php-serialize
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: 1.3.0
|
146
|
+
type: :runtime
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 1.3.0
|
111
153
|
- !ruby/object:Gem::Dependency
|
112
154
|
name: pry
|
113
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -138,6 +180,9 @@ files:
|
|
138
180
|
- lib/sutty_migration/data.rb
|
139
181
|
- lib/sutty_migration/jekyll/document_creator.rb
|
140
182
|
- lib/sutty_migration/wordpress.rb
|
183
|
+
- lib/sutty_migration/wordpress_xml.rb
|
184
|
+
- lib/sutty_migration/wordpress_xml/attachment.rb
|
185
|
+
- lib/sutty_migration/wordpress_xml/post.rb
|
141
186
|
homepage: https://0xacab.org/sutty/jekyll/sutty-migration
|
142
187
|
licenses:
|
143
188
|
- GPL-3.0
|