sutty-migration 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +50 -0
- data/lib/sutty_migration/data.rb +32 -29
- data/lib/sutty_migration/jekyll/document_creator.rb +66 -22
- data/lib/sutty_migration/wordpress.rb +46 -19
- data/lib/sutty_migration/wordpress_xml.rb +154 -0
- data/lib/sutty_migration/wordpress_xml/attachment.rb +69 -0
- data/lib/sutty_migration/wordpress_xml/post.rb +171 -0
- metadata +47 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5486653e0e1eb13f5c4c4f85235c875c782fee7be37a1bee9e4cdd84d5879d0a
|
|
4
|
+
data.tar.gz: '096ab9a992ad5b4cf36bb765a4eb99bd4ac9f2fc35ec25737906eb7c3abc8fdf'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e94245fd5af90a7411b842e13c44a5f85bbbe2544449de98eaa9c52dbb70f095938754bb65dea382f5275df26b6753c37c5cea24c564ff8f98ad6a0f29406e0e
|
|
7
|
+
data.tar.gz: f415da3e9c4ebee1ec8a6101676ae17a319d05ee248c8360d834d7f321190e791eb8274eb6fa0c5fcc74be5c1d2e1b77195894cb9179c3e33626bd090636327b
|
data/README.md
CHANGED
|
@@ -23,6 +23,7 @@ Add the plugin to your `_config.yml`:
|
|
|
23
23
|
```yaml
|
|
24
24
|
plugins:
|
|
25
25
|
- sutty-migration
|
|
26
|
+
array_separator: ','
|
|
26
27
|
```
|
|
27
28
|
|
|
28
29
|
Compile a CSV file with the following required fields:
|
|
@@ -114,6 +115,55 @@ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
|
|
114
115
|
end
|
|
115
116
|
```
|
|
116
117
|
|
|
118
|
+
### WordPress XML
|
|
119
|
+
|
|
120
|
+
If you have the XML dump from a WordPress site, you can migrate content
|
|
121
|
+
by writing a migration plugin.
|
|
122
|
+
|
|
123
|
+
```ruby
|
|
124
|
+
# frozen_string_literal: true
|
|
125
|
+
|
|
126
|
+
require 'sutty_migration/jekyll/document_creator'
|
|
127
|
+
require 'sutty_migration/wordpress_xml'
|
|
128
|
+
require 'jekyll-write-and-commit-changes'
|
|
129
|
+
require 'securerandom'
|
|
130
|
+
|
|
131
|
+
# Run after reading the site
|
|
132
|
+
Jekyll::Hooks.register :site, :post_read do |site|
|
|
133
|
+
# Put the XML dump at _files/wordpress.xml
|
|
134
|
+
xml = SuttyMigration::WordpressXml.new site: site, file: '_files/wordpress.xml'
|
|
135
|
+
|
|
136
|
+
# Download all files
|
|
137
|
+
xml.attachments.values.map(&:download)
|
|
138
|
+
|
|
139
|
+
# Migrate posts. You can move metadata around and recover
|
|
140
|
+
# relationships or any info your theme requires.
|
|
141
|
+
xml.posts.values.each do |post|
|
|
142
|
+
# Update documents already migrated.
|
|
143
|
+
doc = Jekyll::Document.find_or_create(site: site, collection: locale, title: post.title, slug: post.slug, date: post.date)
|
|
144
|
+
# Don't change the UUIDv4
|
|
145
|
+
d.data['uuid'] ||= SecureRandom.uuid
|
|
146
|
+
d.data['draft'] = post.draft?
|
|
147
|
+
d.data['layout'] = 'post'
|
|
148
|
+
d.data['last_modified_at'] = post.last_modified_at
|
|
149
|
+
|
|
150
|
+
d.data['categories'] = post.categories.map { |c| c[:title] }
|
|
151
|
+
d.data['tags'] = post.tags.map { |t| t[:title] }
|
|
152
|
+
|
|
153
|
+
d.data['author'] = post.author[:email]
|
|
154
|
+
d.data['description'] = post.description
|
|
155
|
+
d.content = post.content
|
|
156
|
+
|
|
157
|
+
doc.save
|
|
158
|
+
rescue => e
|
|
159
|
+
Jekyll.logger.warn "Couldn't migrate #{post.title}"
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
exit # Stop here
|
|
163
|
+
end
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
|
|
117
167
|
## Contributing
|
|
118
168
|
|
|
119
169
|
Bug reports and pull requests are welcome on 0xacab.org at
|
data/lib/sutty_migration/data.rb
CHANGED
|
@@ -13,6 +13,8 @@ require_relative 'jekyll/document_creator'
|
|
|
13
13
|
Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
|
14
14
|
documents = site.documents
|
|
15
15
|
|
|
16
|
+
array_separator = site.config.fetch('array_separator', ',')
|
|
17
|
+
|
|
16
18
|
site.data['layouts']&.each do |name, layout|
|
|
17
19
|
site.data.dig('migration', name)&.each do |row|
|
|
18
20
|
row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
|
|
@@ -24,41 +26,42 @@ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
|
|
24
26
|
end
|
|
25
27
|
end
|
|
26
28
|
|
|
27
|
-
document ||= Jekyll::Document.create(site: site, collection: 'posts',
|
|
29
|
+
document ||= Jekyll::Document.create(site: site, collection: 'posts',
|
|
30
|
+
**row.slice(*%w[date slug title]).transform_keys(&:to_sym))
|
|
28
31
|
|
|
29
32
|
row.each do |attribute, value|
|
|
30
33
|
next unless value.blank?
|
|
31
34
|
|
|
32
35
|
row[attribute] =
|
|
33
36
|
case layout.dig(attribute, 'type')
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
37
|
+
when 'string' then value
|
|
38
|
+
when 'text' then value
|
|
39
|
+
when 'tel' then value
|
|
40
|
+
# TODO: validate
|
|
41
|
+
when 'color' then value
|
|
42
|
+
when 'date' then Jekyll::Utils.parse_date(value)
|
|
43
|
+
# TODO: validate
|
|
44
|
+
when 'email' then value
|
|
45
|
+
# TODO: validate
|
|
46
|
+
when 'url' then value
|
|
47
|
+
when 'content' then value
|
|
48
|
+
when 'markdown_content' then value
|
|
49
|
+
when 'markdown' then value
|
|
50
|
+
when 'number' then value.to_i
|
|
51
|
+
when 'order' then value.to_i
|
|
52
|
+
when 'boolean' then !value.strip.empty?
|
|
53
|
+
when 'array' then value.split(array_separator).map(&:strip)
|
|
54
|
+
# TODO: process values from the default array
|
|
55
|
+
when 'predefined_array' then value.split(array_separator).map(&:strip)
|
|
56
|
+
when 'image' then { 'path' => value, 'description' => '' }
|
|
57
|
+
when 'file' then { 'path' => value, 'description' => '' }
|
|
58
|
+
when 'geo' then %w[lat lng].zip(value.split(array_separator, 2).map(&:to_f)).to_h
|
|
59
|
+
when 'belongs_to' then value
|
|
60
|
+
when 'has_many' then value.split(array_separator).map(&:strip)
|
|
61
|
+
when 'has_and_belongs_to_many' then value.split(array_separator).map(&:strip)
|
|
62
|
+
when 'related_posts' then value.split(array_separator).map(&:strip)
|
|
63
|
+
when 'locales' then value.split(array_separator).map(&:strip)
|
|
64
|
+
else value
|
|
62
65
|
end
|
|
63
66
|
end
|
|
64
67
|
|
|
@@ -7,31 +7,75 @@ module SuttyMigration
|
|
|
7
7
|
module Jekyll
|
|
8
8
|
module DocumentCreator
|
|
9
9
|
class DocumentExists < ArgumentError; end
|
|
10
|
+
|
|
10
11
|
def self.included(base)
|
|
11
12
|
base.class_eval do
|
|
13
|
+
class << self
|
|
14
|
+
# Creates a new document in a collection or fails if it already
|
|
15
|
+
# exists.
|
|
16
|
+
#
|
|
17
|
+
# @param :site [Jekyll::Site] Jekyll site
|
|
18
|
+
# @param :date [Time] Post date
|
|
19
|
+
# @param :title [String] Post title
|
|
20
|
+
# @param :slug [String] Post slug, slugified title if empty
|
|
21
|
+
# @param :collection [Jekyll::Collection,String] Collection label or collection
|
|
22
|
+
# @return [Jekyll::Document] A new document
|
|
23
|
+
def create(site:, date:, title:, collection:, slug: nil)
|
|
24
|
+
collection = site.collections[collection] if collection.is_a? String
|
|
25
|
+
slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
|
|
26
|
+
basename = "#{date.strftime('%F')}-#{slug}.markdown"
|
|
27
|
+
path = File.join(collection.directory, basename)
|
|
12
28
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
29
|
+
raise DocumentExists, "#{path} already exists" if File.exist? path
|
|
30
|
+
|
|
31
|
+
::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
|
|
32
|
+
collection.docs << document
|
|
33
|
+
document.data['title'] = title
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Finds a document by its relative path or creates it if it
|
|
38
|
+
# doesn't exist. Helpful for idempotent migrations (create or
|
|
39
|
+
# update actions)
|
|
40
|
+
#
|
|
41
|
+
# @param :site [Jekyll::Site] Jekyll site
|
|
42
|
+
# @param :date [Time] Post date
|
|
43
|
+
# @param :title [String] Post title
|
|
44
|
+
# @param :slug [String] Post slug, slugified title if empty
|
|
45
|
+
# @param :collection [Jekyll::Collection,String] Collection label or collection
|
|
46
|
+
# @return [Jekyll::Document] The found document or a new one
|
|
47
|
+
def find_or_create(site:, date:, title:, collection:, slug: nil)
|
|
48
|
+
collection = site.collections[collection] if collection.is_a? String
|
|
49
|
+
slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
|
|
50
|
+
basename = "#{date.strftime('%F')}-#{slug}.markdown"
|
|
51
|
+
path = File.join(collection.relative_directory, basename)
|
|
52
|
+
|
|
53
|
+
return find(site: site, relative_path: path) if File.exist?(path)
|
|
54
|
+
|
|
55
|
+
create(site: site, date: date, title: title, slug: slug, collection: collection)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Finds a document by its relative path
|
|
59
|
+
#
|
|
60
|
+
# @param :site [Jekyll::Site]
|
|
61
|
+
# @param :relative_path [String]
|
|
62
|
+
# @return [Jekyll::Document,Nil]
|
|
63
|
+
def find(site:, relative_path:)
|
|
64
|
+
indexed_documents_by_relative_path(site)[relative_path]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Index documents by relative path for faster finding
|
|
68
|
+
#
|
|
69
|
+
# @param [Jekyll::Site]
|
|
70
|
+
# @return [Hash]
|
|
71
|
+
def indexed_documents_by_relative_path(site)
|
|
72
|
+
@indexed_documents_by_relative_path ||= site.documents.reduce({}) do |idx, doc|
|
|
73
|
+
idx.tap do |i|
|
|
74
|
+
i[doc.relative_path] = doc
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
35
79
|
end
|
|
36
80
|
end
|
|
37
81
|
end
|
|
@@ -43,7 +43,9 @@ module SuttyMigration
|
|
|
43
43
|
pfx = "#{prefix}#{blog[:blog_id]}_" if blog[:blog_id] > 1
|
|
44
44
|
pfx ||= prefix
|
|
45
45
|
|
|
46
|
-
[
|
|
46
|
+
[blog[:blog_id],
|
|
47
|
+
blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit,
|
|
48
|
+
multisite: self))]
|
|
47
49
|
end.to_h
|
|
48
50
|
end
|
|
49
51
|
|
|
@@ -94,11 +96,11 @@ module SuttyMigration
|
|
|
94
96
|
end
|
|
95
97
|
|
|
96
98
|
Faraday.get(url) do |req|
|
|
97
|
-
req.options.on_data =
|
|
99
|
+
req.options.on_data = proc do |chunk, downloaded_bytes|
|
|
98
100
|
f.write chunk
|
|
99
101
|
|
|
100
102
|
if progress
|
|
101
|
-
progress.progress =
|
|
103
|
+
progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
|
|
102
104
|
end
|
|
103
105
|
end
|
|
104
106
|
end
|
|
@@ -130,10 +132,31 @@ module SuttyMigration
|
|
|
130
132
|
p.map do |post|
|
|
131
133
|
# Sequel parses dates on localtime
|
|
132
134
|
post[:date] = ::Jekyll::Utils.parse_date(post[:date]) unless post[:date].blank?
|
|
133
|
-
|
|
135
|
+
unless post[:last_modified_at].blank?
|
|
136
|
+
post[:last_modified_at] =
|
|
137
|
+
::Jekyll::Utils.parse_date(post[:last_modified_at])
|
|
138
|
+
end
|
|
134
139
|
|
|
135
|
-
post[:front_matter] =
|
|
136
|
-
|
|
140
|
+
post[:front_matter] =
|
|
141
|
+
begin
|
|
142
|
+
unless post[:front_matter].blank?
|
|
143
|
+
JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values do |v|
|
|
144
|
+
v.size == 1 ? v.first : v
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
rescue JSON::ParserError
|
|
148
|
+
{}
|
|
149
|
+
end
|
|
150
|
+
post[:terms] =
|
|
151
|
+
begin
|
|
152
|
+
unless post[:terms].blank?
|
|
153
|
+
JSON.parse(post[:terms]).transform_keys(&:to_sym).transform_values do |v|
|
|
154
|
+
v.size == 1 ? v.first : v
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
rescue JSON::ParserError
|
|
158
|
+
{}
|
|
159
|
+
end
|
|
137
160
|
end
|
|
138
161
|
end
|
|
139
162
|
end
|
|
@@ -167,7 +190,7 @@ module SuttyMigration
|
|
|
167
190
|
<<~EOQ
|
|
168
191
|
select
|
|
169
192
|
u.*
|
|
170
|
-
#{
|
|
193
|
+
#{', json_group_object(m.meta_key, m.meta_value) as meta' if with_meta}
|
|
171
194
|
from #{pfx}users as u
|
|
172
195
|
#{"left join #{pfx}usermeta as m on m.user_id = u.id" if with_meta}
|
|
173
196
|
group by u.id
|
|
@@ -203,11 +226,11 @@ module SuttyMigration
|
|
|
203
226
|
p.menu_order as menu_order,
|
|
204
227
|
p.post_mime_type as mime_type,
|
|
205
228
|
p.comment_count as comment_count
|
|
206
|
-
#{
|
|
207
|
-
#{
|
|
229
|
+
#{', f.front_matter as front_matter' if with_meta}
|
|
230
|
+
#{', t.terms as terms' if with_meta}
|
|
208
231
|
from #{prefix}posts as p
|
|
209
232
|
#{"left join (#{meta_query}) as f on f.post_id = p.ID" if with_meta}
|
|
210
|
-
#{"left join (#{terms_query
|
|
233
|
+
#{"left join (#{terms_query}) as t on t.post_id = p.ID" if with_meta}
|
|
211
234
|
#{"where p.post_type = '#{layout}'" if layout}
|
|
212
235
|
group by p.ID
|
|
213
236
|
EOQ
|
|
@@ -238,17 +261,21 @@ module SuttyMigration
|
|
|
238
261
|
#
|
|
239
262
|
# @param :layout [String] Layout name
|
|
240
263
|
# @return [String]
|
|
241
|
-
def terms_query
|
|
264
|
+
def terms_query
|
|
242
265
|
<<~EOQ
|
|
243
266
|
select
|
|
244
|
-
|
|
245
|
-
json_group_object(
|
|
246
|
-
from
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
267
|
+
post_id,
|
|
268
|
+
json_group_object(taxonomy, json(terms)) as terms
|
|
269
|
+
from (
|
|
270
|
+
select
|
|
271
|
+
r.object_id as post_id,
|
|
272
|
+
tt.taxonomy,
|
|
273
|
+
json_group_array(t.name) as terms
|
|
274
|
+
from #{prefix}term_relationships as r
|
|
275
|
+
left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
|
|
276
|
+
left join #{prefix}terms as t on t.term_id = tt.term_id
|
|
277
|
+
group by r.object_id)
|
|
278
|
+
group by post_id
|
|
252
279
|
EOQ
|
|
253
280
|
end
|
|
254
281
|
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require_relative 'wordpress_xml/post'
|
|
5
|
+
require_relative 'wordpress_xml/attachment'
|
|
6
|
+
|
|
7
|
+
module SuttyMigration
|
|
8
|
+
# Understands the XML dump generated by Wordpress and creates
|
|
9
|
+
# Jekyll::Documents
|
|
10
|
+
class WordpressXml
|
|
11
|
+
attr_reader :site, :file, :xml
|
|
12
|
+
|
|
13
|
+
# @param :site [Jekyll::Site] Jekyll site
|
|
14
|
+
# @param :file [String] File path
|
|
15
|
+
def initialize(site:, file:)
|
|
16
|
+
@site = site
|
|
17
|
+
@file = file
|
|
18
|
+
@xml = Nokogiri::XML File.read(file)
|
|
19
|
+
|
|
20
|
+
# Make things easier by removing namespaces.
|
|
21
|
+
xml.remove_namespaces!
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def inspect
|
|
25
|
+
'#<SuttyMigration::WordpressXml>'
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Site URL
|
|
29
|
+
#
|
|
30
|
+
# @return [String]
|
|
31
|
+
def url
|
|
32
|
+
@url ||= attribute_value(xml, 'channel > link')
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Site title
|
|
36
|
+
#
|
|
37
|
+
# @return [String]
|
|
38
|
+
def title
|
|
39
|
+
@title ||= attribute_value(xml, 'channel > title')
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Description
|
|
43
|
+
#
|
|
44
|
+
# @return [String]
|
|
45
|
+
def description
|
|
46
|
+
@description ||= attribute_value(xml, 'channel > description')
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Language
|
|
50
|
+
#
|
|
51
|
+
# TODO: Migrate multilanguage sites.
|
|
52
|
+
#
|
|
53
|
+
# @return [String]
|
|
54
|
+
def language
|
|
55
|
+
@language ||= attribute_value(xml, 'channel > language')
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Authors with attributes, indexed by author email.
|
|
59
|
+
#
|
|
60
|
+
# @return [Hash]
|
|
61
|
+
def authors
|
|
62
|
+
@authors ||= xml.css('channel > author').map do |author|
|
|
63
|
+
{
|
|
64
|
+
attribute_value(author, 'author_email') => {
|
|
65
|
+
id: attribute_value(author, 'author_id').to_i,
|
|
66
|
+
display_name: attribute_value(author, 'author_display_name'),
|
|
67
|
+
first_name: attribute_value(author, 'author_first_name'),
|
|
68
|
+
last_name: attribute_value(author, 'author_last_name'),
|
|
69
|
+
email: attribute_value(author, 'author_email')
|
|
70
|
+
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
end.reduce(&:merge)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Categories with attributes, indexed by slug ("nicename")
|
|
77
|
+
#
|
|
78
|
+
# @return [Hash]
|
|
79
|
+
def categories
|
|
80
|
+
@categories ||= xml.css('channel > category').map do |category|
|
|
81
|
+
{
|
|
82
|
+
attribute_value(category, 'category_nicename') => {
|
|
83
|
+
id: attribute_value(category, 'term_id').to_i,
|
|
84
|
+
title: attribute_value(category, 'cat_name'),
|
|
85
|
+
parent: attribute_value(category, 'category_parent'),
|
|
86
|
+
slug: attribute_value(category, 'category_nicename')
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
end.reduce(&:merge)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Tags with attributes, indexed by slug
|
|
93
|
+
#
|
|
94
|
+
# @return [Hash]
|
|
95
|
+
def tags
|
|
96
|
+
@tags ||= xml.css('channel > tag').map do |tag|
|
|
97
|
+
{
|
|
98
|
+
attribute_value(tag, 'tag_slug') => {
|
|
99
|
+
id: attribute_value(tag, 'term_id').to_i,
|
|
100
|
+
title: attribute_value(tag, 'tag_name'),
|
|
101
|
+
slug: attribute_value(tag, 'tag_slug')
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
end.reduce(&:merge)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Posts, indexed by ID
|
|
108
|
+
#
|
|
109
|
+
# @return [Hash]
|
|
110
|
+
def posts
|
|
111
|
+
@posts ||= items_find_by('post_type', 'post').map do |post|
|
|
112
|
+
{ attribute_value(post, 'post_id').to_i => Post.new(wordpress: self, item: post) }
|
|
113
|
+
end.reduce(&:merge)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Pages, indexed by ID
|
|
117
|
+
#
|
|
118
|
+
# @return [Hash]
|
|
119
|
+
def pages
|
|
120
|
+
@pages ||= items_find_by('post_type', 'page').map do |page|
|
|
121
|
+
{ attribute_value(page, 'post_id').to_i => Post.new(wordpress: self, item: page) }
|
|
122
|
+
end.reduce(&:merge)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Attachments, indexed by ID
|
|
126
|
+
#
|
|
127
|
+
# @return [Hash]
|
|
128
|
+
def attachments
|
|
129
|
+
@attachments ||= items_find_by('post_type', 'attachment').map do |attachment|
|
|
130
|
+
{ attribute_value(attachment, 'post_id').to_i => Attachment.new(wordpress: self, item: attachment) }
|
|
131
|
+
end.reduce(&:merge)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Find items by attribute and value
|
|
135
|
+
#
|
|
136
|
+
# @param [String] Attribute name
|
|
137
|
+
# @param [String] Attribute value
|
|
138
|
+
# @return [Nokogiri::NodeSet]
|
|
139
|
+
def items_find_by(attribute, value)
|
|
140
|
+
xml.css('channel > item').select do |item|
|
|
141
|
+
attribute_value(item, attribute) == value
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Get element's attribute value
|
|
146
|
+
#
|
|
147
|
+
# @param [Nokogiri::XML::Element]
|
|
148
|
+
# @param [String]
|
|
149
|
+
# @return [String]
|
|
150
|
+
def attribute_value(element, attribute)
|
|
151
|
+
element.at_css(attribute).text
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'post'
|
|
4
|
+
require 'php-serialize'
|
|
5
|
+
require 'faraday'
|
|
6
|
+
require 'progressbar'
|
|
7
|
+
|
|
8
|
+
module SuttyMigration
|
|
9
|
+
class WordpressXml
|
|
10
|
+
# Represents an attachment or uploaded file.
|
|
11
|
+
class Attachment < Post
|
|
12
|
+
# File URL
|
|
13
|
+
#
|
|
14
|
+
# @return [String]
|
|
15
|
+
def attachment_url
|
|
16
|
+
@attachment_url ||= attribute_value 'attachment_url'
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# File destination
|
|
20
|
+
#
|
|
21
|
+
# @return [String]
|
|
22
|
+
def dest
|
|
23
|
+
@dest ||= URI(attachment_url).path.sub(%r{\A/}, '')
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Metadata, with file information as a Hash
|
|
27
|
+
#
|
|
28
|
+
# @return [Hash]
|
|
29
|
+
def meta
|
|
30
|
+
super.tap do |m|
|
|
31
|
+
m['_wp_attachment_metadata'] = PHP.unserialize m['_wp_attachment_metadata']
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Download the file if it doesn't exist. Optionally show a
|
|
36
|
+
# progress bar.
|
|
37
|
+
#
|
|
38
|
+
# @param :progress [Boolean]
|
|
39
|
+
# @return [Boolean]
|
|
40
|
+
def download(progress: true)
|
|
41
|
+
return true if File.exist? dest
|
|
42
|
+
|
|
43
|
+
::Jekyll.logger.info "Downloading #{dest}"
|
|
44
|
+
|
|
45
|
+
FileUtils.mkdir_p File.dirname(dest)
|
|
46
|
+
|
|
47
|
+
File.open(dest, 'w') do |f|
|
|
48
|
+
if progress
|
|
49
|
+
head = Faraday.head(attachment_url)
|
|
50
|
+
content_length = head.headers['content-length'].to_i
|
|
51
|
+
progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
Faraday.get(attachment_url) do |req|
|
|
55
|
+
req.options.on_data = proc do |chunk, downloaded_bytes|
|
|
56
|
+
f.write chunk
|
|
57
|
+
|
|
58
|
+
if progress
|
|
59
|
+
progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
File.exist? dest
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'wordpress_formatting/wpautop'
|
|
4
|
+
require 'jekyll/utils'
|
|
5
|
+
|
|
6
|
+
module SuttyMigration
|
|
7
|
+
class WordpressXml
|
|
8
|
+
# Represents a WordPress post
|
|
9
|
+
class Post
|
|
10
|
+
attr_reader :wordpress, :item
|
|
11
|
+
|
|
12
|
+
# @param :wordpress [SuttyMigration::WordpressXml]
|
|
13
|
+
# @param :item [Nokogiri::XML::Element]
|
|
14
|
+
def initialize(wordpress:, item:)
|
|
15
|
+
@wordpress = wordpress
|
|
16
|
+
@item = item
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def inspect
|
|
20
|
+
"#<SuttyMigration::WordpressXml::Post title=\"#{title}\">"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Post ID
|
|
24
|
+
#
|
|
25
|
+
# @return [Integer]
|
|
26
|
+
def id
|
|
27
|
+
@id ||= attribute_value('post_id').to_i
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Permalink. Absolute URL to the post.
|
|
31
|
+
#
|
|
32
|
+
# @return [String]
|
|
33
|
+
def permalink
|
|
34
|
+
@permalink ||= attribute_value('link').sub(wordpress.url, '')
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Title
|
|
38
|
+
#
|
|
39
|
+
# @return [String]
|
|
40
|
+
def title
|
|
41
|
+
@title ||= attribute_value('title')
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Description
|
|
45
|
+
#
|
|
46
|
+
# @return [String]
|
|
47
|
+
def description
|
|
48
|
+
@description ||= attribute_value('description')
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Slug ("post name")
|
|
52
|
+
#
|
|
53
|
+
# @return [String]
|
|
54
|
+
def slug
|
|
55
|
+
@slug ||= attribute_value('post_name')
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Publication date.
|
|
59
|
+
#
|
|
60
|
+
# WordPress can store this date in three different fields and
|
|
61
|
+
# sometimes they come empty or invalid.
|
|
62
|
+
#
|
|
63
|
+
# @return [Time]
|
|
64
|
+
def date
|
|
65
|
+
@date ||= %w[pubDate post_date_gmt post_date].map do |date_attr|
|
|
66
|
+
::Jekyll::Utils.parse_date attribute_value(date_attr)
|
|
67
|
+
rescue StandardError
|
|
68
|
+
end.compact.first
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Modification date.
|
|
72
|
+
#
|
|
73
|
+
# @return [Time]
|
|
74
|
+
def last_modified_at
|
|
75
|
+
@last_modified_at ||= ::Jekyll::Utils.parse_date attribute_value('post_modified_gmt')
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Content as HTML, with site URL removed.
|
|
79
|
+
#
|
|
80
|
+
# @return [String]
|
|
81
|
+
def content
|
|
82
|
+
@content ||= WordpressFormatting::Wpautop.wpautop(attribute_value('encoded')).gsub(
|
|
83
|
+
/ (href|src)="#{wordpress.url}/, ' \\1="'
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Author attributes.
|
|
88
|
+
#
|
|
89
|
+
# @return [Hash]
|
|
90
|
+
def author
|
|
91
|
+
@author ||= wordpress.authors[attribute_value('creator')]
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Post password. Use with jekyll-crypto.
|
|
95
|
+
#
|
|
96
|
+
# @return [String]
|
|
97
|
+
def password
|
|
98
|
+
@password ||= attribute_value 'post_password'
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Tags with attributes.
|
|
102
|
+
#
|
|
103
|
+
# @return [Hash]
|
|
104
|
+
def tags
|
|
105
|
+
@tags ||= item.css('category').select do |c|
|
|
106
|
+
c[:domain] == 'post_tag'
|
|
107
|
+
end.map do |c|
|
|
108
|
+
wordpress.tags[c[:nicename]]
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Categories with attributes.
|
|
113
|
+
#
|
|
114
|
+
# @return [Hash]
|
|
115
|
+
def categories
|
|
116
|
+
@categories ||= item.css('category').select do |c|
|
|
117
|
+
c[:domain] == 'category'
|
|
118
|
+
end.map do |c|
|
|
119
|
+
wordpress.categories[c[:nicename]]
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Metadata. Plugins store useful information here. Duplicated
|
|
124
|
+
# keys are returned as an Array of values.
|
|
125
|
+
#
|
|
126
|
+
# @return [Hash]
|
|
127
|
+
def meta
|
|
128
|
+
@meta ||= {}.tap do |meta|
|
|
129
|
+
item.css('postmeta').each do |m|
|
|
130
|
+
key = m.css('meta_key').text
|
|
131
|
+
value = m.css('meta_value').text
|
|
132
|
+
|
|
133
|
+
case meta[key]
|
|
134
|
+
when nil then meta[key] = value
|
|
135
|
+
when String then meta[key] = [meta[key], value]
|
|
136
|
+
when Array then meta[key] << value
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Order. Higher are sorted on top by jekyll-order.
|
|
143
|
+
#
|
|
144
|
+
# @return [Integer]
|
|
145
|
+
def order
|
|
146
|
+
@order ||= attribute_value 'is_sticky'
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Publication status
|
|
150
|
+
#
|
|
151
|
+
# @return [Boolean]
|
|
152
|
+
def published?
|
|
153
|
+
@published ||= attribute_value('status') == 'publish'
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Publication status
|
|
157
|
+
#
|
|
158
|
+
# @return [Boolean]
|
|
159
|
+
def draft?
|
|
160
|
+
@draft ||= attribute_value('status') == 'draft'
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Get a value from the attribute
|
|
164
|
+
#
|
|
165
|
+
# @return [String]
|
|
166
|
+
def attribute_value(key)
|
|
167
|
+
item.at_css(key).text
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sutty-migration
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- f
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-
|
|
11
|
+
date: 2021-08-10 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: jekyll
|
|
@@ -108,6 +108,48 @@ dependencies:
|
|
|
108
108
|
- - "~>"
|
|
109
109
|
- !ruby/object:Gem::Version
|
|
110
110
|
version: '5.45'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: wordpress-formatting
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - "~>"
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: 0.1.0
|
|
118
|
+
type: :runtime
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - "~>"
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: 0.1.0
|
|
125
|
+
- !ruby/object:Gem::Dependency
|
|
126
|
+
name: nokogiri
|
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
|
128
|
+
requirements:
|
|
129
|
+
- - "~>"
|
|
130
|
+
- !ruby/object:Gem::Version
|
|
131
|
+
version: 1.12.0
|
|
132
|
+
type: :runtime
|
|
133
|
+
prerelease: false
|
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
135
|
+
requirements:
|
|
136
|
+
- - "~>"
|
|
137
|
+
- !ruby/object:Gem::Version
|
|
138
|
+
version: 1.12.0
|
|
139
|
+
- !ruby/object:Gem::Dependency
|
|
140
|
+
name: php-serialize
|
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
|
142
|
+
requirements:
|
|
143
|
+
- - "~>"
|
|
144
|
+
- !ruby/object:Gem::Version
|
|
145
|
+
version: 1.3.0
|
|
146
|
+
type: :runtime
|
|
147
|
+
prerelease: false
|
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
149
|
+
requirements:
|
|
150
|
+
- - "~>"
|
|
151
|
+
- !ruby/object:Gem::Version
|
|
152
|
+
version: 1.3.0
|
|
111
153
|
- !ruby/object:Gem::Dependency
|
|
112
154
|
name: pry
|
|
113
155
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -138,6 +180,9 @@ files:
|
|
|
138
180
|
- lib/sutty_migration/data.rb
|
|
139
181
|
- lib/sutty_migration/jekyll/document_creator.rb
|
|
140
182
|
- lib/sutty_migration/wordpress.rb
|
|
183
|
+
- lib/sutty_migration/wordpress_xml.rb
|
|
184
|
+
- lib/sutty_migration/wordpress_xml/attachment.rb
|
|
185
|
+
- lib/sutty_migration/wordpress_xml/post.rb
|
|
141
186
|
homepage: https://0xacab.org/sutty/jekyll/sutty-migration
|
|
142
187
|
licenses:
|
|
143
188
|
- GPL-3.0
|