sutty-migration 0.1.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +98 -0
- data/lib/sutty-migration.rb +1 -69
- data/lib/sutty_migration/core_extensions.rb +26 -0
- data/lib/sutty_migration/data.rb +80 -0
- data/lib/sutty_migration/jekyll/document_creator.rb +85 -0
- data/lib/sutty_migration/wordpress.rb +282 -0
- data/lib/sutty_migration/wordpress_xml.rb +154 -0
- data/lib/sutty_migration/wordpress_xml/attachment.rb +69 -0
- data/lib/sutty_migration/wordpress_xml/post.rb +171 -0
- metadata +121 -3
- data/lib/wordpress.rb +0 -174
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5486653e0e1eb13f5c4c4f85235c875c782fee7be37a1bee9e4cdd84d5879d0a
|
4
|
+
data.tar.gz: '096ab9a992ad5b4cf36bb765a4eb99bd4ac9f2fc35ec25737906eb7c3abc8fdf'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e94245fd5af90a7411b842e13c44a5f85bbbe2544449de98eaa9c52dbb70f095938754bb65dea382f5275df26b6753c37c5cea24c564ff8f98ad6a0f29406e0e
|
7
|
+
data.tar.gz: f415da3e9c4ebee1ec8a6101676ae17a319d05ee248c8360d834d7f321190e791eb8274eb6fa0c5fcc74be5c1d2e1b77195894cb9179c3e33626bd090636327b
|
data/README.md
CHANGED
@@ -23,6 +23,7 @@ Add the plugin to your `_config.yml`:
|
|
23
23
|
```yaml
|
24
24
|
plugins:
|
25
25
|
- sutty-migration
|
26
|
+
array_separator: ','
|
26
27
|
```
|
27
28
|
|
28
29
|
Compile a CSV file with the following required fields:
|
@@ -66,6 +67,103 @@ To start migration just build your site:
|
|
66
67
|
bundle exec jekyll build
|
67
68
|
```
|
68
69
|
|
70
|
+
**Tip:** Files can also be JSON, TSV and YAML, since they're all
|
71
|
+
supported by Jekyll.
|
72
|
+
|
73
|
+
### Wordpress
|
74
|
+
|
75
|
+
Instead of requiring you to install and configure MariaDB/MySQL, you can
|
76
|
+
convert the database into SQLite3 like this:
|
77
|
+
|
78
|
+
```bash
|
79
|
+
git clone https://0xacab.org/sutty/mysql2sqlite.git
|
80
|
+
cd mysql2sqlite
|
81
|
+
./mysql2sqlite /path/to/database/dump.sql |
|
82
|
+
sed -re "s/, 0x([0-9a-f]+),/, X'\1',/i" |
|
83
|
+
sqlite3 wordpress.sqlite3
|
84
|
+
```
|
85
|
+
|
86
|
+
It will probably show some errors.
|
87
|
+
|
88
|
+
Note the `sed` command is required to convert hexadecimal values into
|
89
|
+
SQLite syntax, since `mysql2sqlite` doesn't support this yet.
|
90
|
+
|
91
|
+
Wordpress websites can include lots of posts and metadata, depending on
|
92
|
+
the amount of plugins installed. We don't have an official way of
|
93
|
+
dumping everything into Jekyll, because you will probably want to move
|
94
|
+
things around. You can write a plugin like this:
|
95
|
+
|
96
|
+
```ruby
|
97
|
+
# _plugins/wordpress.rb
|
98
|
+
# frozen_string_literal: true
|
99
|
+
|
100
|
+
require 'sutty_migration/wordpress'
|
101
|
+
require 'sutty_migration/jekyll/document_creator'
|
102
|
+
require 'jekyll-write-and-commit-changes'
|
103
|
+
|
104
|
+
Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
105
|
+
wp = SuttyMigration::Wordpress.new(site: site, database: 'wordpress.sqlite3', prefix: 'wp_', url: 'https://wordpre.ss')
|
106
|
+
|
107
|
+
# Download all files
|
108
|
+
wp.download_all
|
109
|
+
|
110
|
+
wp.posts(layout: 'post').each do |post|
|
111
|
+
doc = Jekyll::Document.create(site: site, title: post[:post_title], date: post[:post_date], collection: 'posts')
|
112
|
+
doc.content = post[:content]
|
113
|
+
doc.save
|
114
|
+
end
|
115
|
+
end
|
116
|
+
```
|
117
|
+
|
118
|
+
### WordPress XML
|
119
|
+
|
120
|
+
If you have the XML dump from a WordPress site, you can migrate content
|
121
|
+
by writing a migration plugin.
|
122
|
+
|
123
|
+
```ruby
|
124
|
+
# frozen_string_literal: true
|
125
|
+
|
126
|
+
require 'sutty_migration/jekyll/document_creator'
|
127
|
+
require 'sutty_migration/wordpress_xml'
|
128
|
+
require 'jekyll-write-and-commit-changes'
|
129
|
+
require 'securerandom'
|
130
|
+
|
131
|
+
# Run after reading the site
|
132
|
+
Jekyll::Hooks.register :site, :post_read do |site|
|
133
|
+
# Put the XML dump at _files/wordpress.xml
|
134
|
+
xml = SuttyMigration::WordpressXml.new site: site, file: '_files/wordpress.xml'
|
135
|
+
|
136
|
+
# Download all files
|
137
|
+
xml.attachments.values.map(&:download)
|
138
|
+
|
139
|
+
# Migrate posts. You can move metadata around and recover
|
140
|
+
# relationships or any info your theme requires.
|
141
|
+
xml.posts.values.each do |post|
|
142
|
+
# Update documents already migrated.
|
143
|
+
doc = Jekyll::Document.find_or_create(site: site, collection: locale, title: post.title, slug: post.slug, date: post.date)
|
144
|
+
# Don't change the UUIDv4
|
145
|
+
d.data['uuid'] ||= SecureRandom.uuid
|
146
|
+
d.data['draft'] = post.draft?
|
147
|
+
d.data['layout'] = 'post'
|
148
|
+
d.data['last_modified_at'] = post.last_modified_at
|
149
|
+
|
150
|
+
d.data['categories'] = post.categories.map { |c| c[:title] }
|
151
|
+
d.data['tags'] = post.tags.map { |t| t[:title] }
|
152
|
+
|
153
|
+
d.data['author'] = post.author[:email]
|
154
|
+
d.data['description'] = post.description
|
155
|
+
d.content = post.content
|
156
|
+
|
157
|
+
doc.save
|
158
|
+
rescue => e
|
159
|
+
Jekyll.logger.warn "Couldn't migrate #{post.title}"
|
160
|
+
end
|
161
|
+
|
162
|
+
exit # Stop here
|
163
|
+
end
|
164
|
+
```
|
165
|
+
|
166
|
+
|
69
167
|
## Contributing
|
70
168
|
|
71
169
|
Bug reports and pull requests are welcome on 0xacab.org at
|
data/lib/sutty-migration.rb
CHANGED
@@ -1,71 +1,3 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
require 'fast_blank'
|
5
|
-
require 'jekyll-write-and-commit-changes'
|
6
|
-
|
7
|
-
Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
8
|
-
documents = site.documents
|
9
|
-
|
10
|
-
site.data['layouts']&.each do |name, layout|
|
11
|
-
site.data.dig('migration', name)&.each do |row|
|
12
|
-
row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
|
13
|
-
|
14
|
-
if row['id']
|
15
|
-
document = documents.find do |doc|
|
16
|
-
doc.data['id'] == row['id']
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
document ||=
|
21
|
-
begin
|
22
|
-
base = "#{row['date'] || Date.today.to_s}-#{Jekyll::Utils.slugify(row['title'], mode: 'latin')}.markdown"
|
23
|
-
path = File.join(site.source, '_posts', base)
|
24
|
-
|
25
|
-
raise ArgumentError, "Row #{row['id']} duplicates file #{base}" if File.exist? path
|
26
|
-
|
27
|
-
doc = Jekyll::Document.new(path, site: site, collection: site.collections['posts'])
|
28
|
-
site.collections['posts'] << doc
|
29
|
-
|
30
|
-
doc
|
31
|
-
end
|
32
|
-
|
33
|
-
row.each do |attribute, value|
|
34
|
-
row[attribute] =
|
35
|
-
case layout.dig(attribute, 'type')
|
36
|
-
when 'string' then value
|
37
|
-
when 'text' then value
|
38
|
-
when 'tel' then value
|
39
|
-
when 'color' then value # TODO: validar
|
40
|
-
when 'date' then Jekyll::Utils.parse_date(value)
|
41
|
-
when 'email' then value # TODO: validar
|
42
|
-
when 'url' then value # TODO: validar
|
43
|
-
when 'content' then value
|
44
|
-
when 'markdown_content' then value
|
45
|
-
when 'markdown' then value
|
46
|
-
when 'number' then value.to_i
|
47
|
-
when 'order' then value.to_i
|
48
|
-
when 'boolean' then !value.strip.empty?
|
49
|
-
when 'array' then value.split(',').map(&:strip)
|
50
|
-
# TODO: procesar los valores en base a los valores predefinidos
|
51
|
-
when 'predefined_array' then value.split(',').map(&:strip)
|
52
|
-
when 'image' then { 'path' => value, 'description' => '' }
|
53
|
-
when 'file' then { 'path' => value, 'description' => '' }
|
54
|
-
when 'geo' then %w[lat lng].zip(value.split(',', 2).map(&:to_f)).to_h
|
55
|
-
when 'belongs_to' then value
|
56
|
-
when 'has_many' then value.split(',').map(&:strip)
|
57
|
-
when 'has_and_belongs_to_many' then value.split(',').map(&:strip)
|
58
|
-
when 'related_posts' then value.split(',').map(&:strip)
|
59
|
-
when 'locales' then value.split(',').map(&:strip)
|
60
|
-
else value
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
document.data['uuid'] ||= SecureRandom.uuid
|
65
|
-
document.content = row.delete('content')
|
66
|
-
|
67
|
-
document.data.merge! row
|
68
|
-
document.save
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
3
|
+
require_relative 'sutty_migration/data'
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Expandir String para poder verificar si está vacía
|
4
|
+
require 'fast_blank'
|
5
|
+
|
6
|
+
# Verificar que los valores nulos estén vacíos
|
7
|
+
class NilClass
|
8
|
+
def blank?
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
def present?
|
13
|
+
false
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Verificar que una fecha está vacía
|
18
|
+
class Time
|
19
|
+
def blank?
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
23
|
+
def present?
|
24
|
+
true
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
require_relative 'core_extensions'
|
5
|
+
require_relative 'jekyll/document_creator'
|
6
|
+
|
7
|
+
# Registers a plugin for converting CSV files into posts following
|
8
|
+
# Sutty's layout definition.
|
9
|
+
#
|
10
|
+
# If jekyll-write-and-commit-changes is enabled, documents will be saved
|
11
|
+
# on disk and commited is the build command is run with
|
12
|
+
# JEKYLL_ENV=production
|
13
|
+
Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
14
|
+
documents = site.documents
|
15
|
+
|
16
|
+
array_separator = site.config.fetch('array_separator', ',')
|
17
|
+
|
18
|
+
site.data['layouts']&.each do |name, layout|
|
19
|
+
site.data.dig('migration', name)&.each do |row|
|
20
|
+
row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
|
21
|
+
row['date'] ||= Time.now
|
22
|
+
|
23
|
+
unless row['id'].blank?
|
24
|
+
document = documents.find do |doc|
|
25
|
+
doc.data['id'] == row['id']
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
document ||= Jekyll::Document.create(site: site, collection: 'posts',
|
30
|
+
**row.slice(*%w[date slug title]).transform_keys(&:to_sym))
|
31
|
+
|
32
|
+
row.each do |attribute, value|
|
33
|
+
next unless value.blank?
|
34
|
+
|
35
|
+
row[attribute] =
|
36
|
+
case layout.dig(attribute, 'type')
|
37
|
+
when 'string' then value
|
38
|
+
when 'text' then value
|
39
|
+
when 'tel' then value
|
40
|
+
# TODO: validate
|
41
|
+
when 'color' then value
|
42
|
+
when 'date' then Jekyll::Utils.parse_date(value)
|
43
|
+
# TODO: validate
|
44
|
+
when 'email' then value
|
45
|
+
# TODO: validate
|
46
|
+
when 'url' then value
|
47
|
+
when 'content' then value
|
48
|
+
when 'markdown_content' then value
|
49
|
+
when 'markdown' then value
|
50
|
+
when 'number' then value.to_i
|
51
|
+
when 'order' then value.to_i
|
52
|
+
when 'boolean' then !value.strip.empty?
|
53
|
+
when 'array' then value.split(array_separator).map(&:strip)
|
54
|
+
# TODO: process values from the default array
|
55
|
+
when 'predefined_array' then value.split(array_separator).map(&:strip)
|
56
|
+
when 'image' then { 'path' => value, 'description' => '' }
|
57
|
+
when 'file' then { 'path' => value, 'description' => '' }
|
58
|
+
when 'geo' then %w[lat lng].zip(value.split(array_separator, 2).map(&:to_f)).to_h
|
59
|
+
when 'belongs_to' then value
|
60
|
+
when 'has_many' then value.split(array_separator).map(&:strip)
|
61
|
+
when 'has_and_belongs_to_many' then value.split(array_separator).map(&:strip)
|
62
|
+
when 'related_posts' then value.split(array_separator).map(&:strip)
|
63
|
+
when 'locales' then value.split(array_separator).map(&:strip)
|
64
|
+
else value
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
document.data['uuid'] ||= SecureRandom.uuid
|
69
|
+
document.content = row.delete('content')
|
70
|
+
|
71
|
+
document.data.merge! row
|
72
|
+
document.save if document.respond_to? :save
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
next unless site.respond_to?(:repository)
|
77
|
+
next unless ENV['JEKYLL_ENV'] == 'production'
|
78
|
+
|
79
|
+
site.repository.commit 'CSV Migration'
|
80
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'jekyll/utils'
|
4
|
+
require_relative '../core_extensions'
|
5
|
+
|
6
|
+
module SuttyMigration
|
7
|
+
module Jekyll
|
8
|
+
module DocumentCreator
|
9
|
+
class DocumentExists < ArgumentError; end
|
10
|
+
|
11
|
+
def self.included(base)
|
12
|
+
base.class_eval do
|
13
|
+
class << self
|
14
|
+
# Creates a new document in a collection or fails if it already
|
15
|
+
# exists.
|
16
|
+
#
|
17
|
+
# @param :site [Jekyll::Site] Jekyll site
|
18
|
+
# @param :date [Time] Post date
|
19
|
+
# @param :title [String] Post title
|
20
|
+
# @param :slug [String] Post slug, slugified title if empty
|
21
|
+
# @param :collection [Jekyll::Collection,String] Collection label or collection
|
22
|
+
# @return [Jekyll::Document] A new document
|
23
|
+
def create(site:, date:, title:, collection:, slug: nil)
|
24
|
+
collection = site.collections[collection] if collection.is_a? String
|
25
|
+
slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
|
26
|
+
basename = "#{date.strftime('%F')}-#{slug}.markdown"
|
27
|
+
path = File.join(collection.directory, basename)
|
28
|
+
|
29
|
+
raise DocumentExists, "#{path} already exists" if File.exist? path
|
30
|
+
|
31
|
+
::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
|
32
|
+
collection.docs << document
|
33
|
+
document.data['title'] = title
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Finds a document by its relative path or creates it if it
|
38
|
+
# doesn't exist. Helpful for idempotent migrations (create or
|
39
|
+
# update actions)
|
40
|
+
#
|
41
|
+
# @param :site [Jekyll::Site] Jekyll site
|
42
|
+
# @param :date [Time] Post date
|
43
|
+
# @param :title [String] Post title
|
44
|
+
# @param :slug [String] Post slug, slugified title if empty
|
45
|
+
# @param :collection [Jekyll::Collection,String] Collection label or collection
|
46
|
+
# @return [Jekyll::Document] The found document or a new one
|
47
|
+
def find_or_create(site:, date:, title:, collection:, slug: nil)
|
48
|
+
collection = site.collections[collection] if collection.is_a? String
|
49
|
+
slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
|
50
|
+
basename = "#{date.strftime('%F')}-#{slug}.markdown"
|
51
|
+
path = File.join(collection.relative_directory, basename)
|
52
|
+
|
53
|
+
return find(site: site, relative_path: path) if File.exist?(path)
|
54
|
+
|
55
|
+
create(site: site, date: date, title: title, slug: slug, collection: collection)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Finds a document by its relative path
|
59
|
+
#
|
60
|
+
# @param :site [Jekyll::Site]
|
61
|
+
# @param :relative_path [String]
|
62
|
+
# @return [Jekyll::Document,Nil]
|
63
|
+
def find(site:, relative_path:)
|
64
|
+
indexed_documents_by_relative_path(site)[relative_path]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Index documents by relative path for faster finding
|
68
|
+
#
|
69
|
+
# @param [Jekyll::Site]
|
70
|
+
# @return [Hash]
|
71
|
+
def indexed_documents_by_relative_path(site)
|
72
|
+
@indexed_documents_by_relative_path ||= site.documents.reduce({}) do |idx, doc|
|
73
|
+
idx.tap do |i|
|
74
|
+
i[doc.relative_path] = doc
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
::Jekyll::Document.include SuttyMigration::Jekyll::DocumentCreator
|
@@ -0,0 +1,282 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'time'
|
4
|
+
require 'securerandom'
|
5
|
+
require 'sequel'
|
6
|
+
require 'sqlite3'
|
7
|
+
require 'json'
|
8
|
+
require 'faraday'
|
9
|
+
require 'progressbar'
|
10
|
+
require 'jekyll/utils'
|
11
|
+
|
12
|
+
module SuttyMigration
|
13
|
+
# Brings posts and attachments from a SQLite3 database. You can
|
14
|
+
# convert a MySQL/MariaDB dump by using `mysql2sqlite`.
|
15
|
+
#
|
16
|
+
# It doesn't convert them into Jekyll posts but allows you to write a
|
17
|
+
# migration plugin where you can convert data by yourself. We may add
|
18
|
+
# this feature in the future.
|
19
|
+
class Wordpress
|
20
|
+
attr_reader :site, :prefix, :limit, :url, :wp, :database, :multisite
|
21
|
+
|
22
|
+
# @param :site [Jekyll::Site] Jekyll site
|
23
|
+
# @param :url [String] Wordpress site URL (must be up for downloads)
|
24
|
+
# @param :database [String] Database path, by default `_data/wordpress.sqlite3`
|
25
|
+
# @param :prefix [String] WP table prefix
|
26
|
+
# @param :limit [Integer] Page length
|
27
|
+
# @param :multisite [Boolean] Site is multisite
|
28
|
+
def initialize(site:, url:, database: nil, prefix: 'wp_', limit: 10, multisite: nil)
|
29
|
+
@site = site
|
30
|
+
@prefix = prefix.freeze
|
31
|
+
@limit = limit.freeze
|
32
|
+
@url = url.freeze
|
33
|
+
@database = database || File.join(site.source, '_data', 'wordpress.sqlite3')
|
34
|
+
@multisite = multisite
|
35
|
+
end
|
36
|
+
|
37
|
+
# Generate database connections for a multisite WP
|
38
|
+
#
|
39
|
+
# @return [Hash] { "ID" => SuttyMigration::Wordpress }
|
40
|
+
def blogs
|
41
|
+
@blogs ||= wp["select * from #{prefix}blogs"].to_a.map do |blog|
|
42
|
+
url = "https://#{blog[:domain]}#{blog[:path]}"
|
43
|
+
pfx = "#{prefix}#{blog[:blog_id]}_" if blog[:blog_id] > 1
|
44
|
+
pfx ||= prefix
|
45
|
+
|
46
|
+
[blog[:blog_id],
|
47
|
+
blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit,
|
48
|
+
multisite: self))]
|
49
|
+
end.to_h
|
50
|
+
end
|
51
|
+
|
52
|
+
def options
|
53
|
+
@options ||= wp["select option_name, option_value from #{prefix}options"].to_a.map(&:values).to_h.transform_keys(&:to_sym)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Open the database.
|
57
|
+
#
|
58
|
+
# @return [Sequel::SQLite::Database]
|
59
|
+
def wp
|
60
|
+
@wp ||= Sequel.sqlite(database).tap do |db|
|
61
|
+
db.extension :pagination
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Download all attachments. Adds the local path to them.
|
66
|
+
#
|
67
|
+
# @param :progress [Boolean] Toggle progress bar
|
68
|
+
# @return [Nil]
|
69
|
+
def download_all(progress: true)
|
70
|
+
posts(layout: 'attachment').each do |attachment|
|
71
|
+
attachment[:front_matter]['file_path'] = download(url: attachment[:guid], progress: progress)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Downloads a file if needed, optionally showing a progress bar.
|
76
|
+
#
|
77
|
+
# @param :url [String] File URL
|
78
|
+
# @param :progress [Boolean] Toggle progress bar
|
79
|
+
# @return [String] File local path
|
80
|
+
def download(url:, progress: true)
|
81
|
+
uri = URI(url)
|
82
|
+
dest = uri.path.sub(%r{\A/}, '')
|
83
|
+
full = File.join(site.source, dest)
|
84
|
+
|
85
|
+
return dest if File.exist? full
|
86
|
+
|
87
|
+
::Jekyll.logger.info "Downloading #{dest}"
|
88
|
+
|
89
|
+
FileUtils.mkdir_p File.dirname(full)
|
90
|
+
|
91
|
+
File.open(full, 'w') do |f|
|
92
|
+
if progress
|
93
|
+
head = Faraday.head(url)
|
94
|
+
content_length = head.headers['content-length'].to_i
|
95
|
+
progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
|
96
|
+
end
|
97
|
+
|
98
|
+
Faraday.get(url) do |req|
|
99
|
+
req.options.on_data = proc do |chunk, downloaded_bytes|
|
100
|
+
f.write chunk
|
101
|
+
|
102
|
+
if progress
|
103
|
+
progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
dest
|
110
|
+
end
|
111
|
+
|
112
|
+
# List post types
|
113
|
+
#
|
114
|
+
# @return [Array]
|
115
|
+
def layouts
|
116
|
+
@layouts ||= wp["select distinct post_type from #{prefix}posts"].to_a.map(&:values).flatten
|
117
|
+
end
|
118
|
+
|
119
|
+
# Finds all posts optionally filtering by post type. This is not
|
120
|
+
# the official Sequel syntax, but it retrieves metadata as objects
|
121
|
+
# with a single query (and a sub-query).
|
122
|
+
#
|
123
|
+
# @param :layout [String] Layout name, one of #layouts
|
124
|
+
# @param :with_meta [Boolean] Toggle metadata pulling and conversion
|
125
|
+
# @return [Enumerator]
|
126
|
+
def posts(**options)
|
127
|
+
unless options[:layout].blank? || layouts.include?(options[:layout])
|
128
|
+
raise ArgumentError, "#{options[:layout]} must be one of #{layouts.join(', ')}"
|
129
|
+
end
|
130
|
+
|
131
|
+
wp[post_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |p|
|
132
|
+
p.map do |post|
|
133
|
+
# Sequel parses dates on localtime
|
134
|
+
post[:date] = ::Jekyll::Utils.parse_date(post[:date]) unless post[:date].blank?
|
135
|
+
unless post[:last_modified_at].blank?
|
136
|
+
post[:last_modified_at] =
|
137
|
+
::Jekyll::Utils.parse_date(post[:last_modified_at])
|
138
|
+
end
|
139
|
+
|
140
|
+
post[:front_matter] =
|
141
|
+
begin
|
142
|
+
unless post[:front_matter].blank?
|
143
|
+
JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values do |v|
|
144
|
+
v.size == 1 ? v.first : v
|
145
|
+
end
|
146
|
+
end
|
147
|
+
rescue JSON::ParserError
|
148
|
+
{}
|
149
|
+
end
|
150
|
+
post[:terms] =
|
151
|
+
begin
|
152
|
+
unless post[:terms].blank?
|
153
|
+
JSON.parse(post[:terms]).transform_keys(&:to_sym).transform_values do |v|
|
154
|
+
v.size == 1 ? v.first : v
|
155
|
+
end
|
156
|
+
end
|
157
|
+
rescue JSON::ParserError
|
158
|
+
{}
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# Brings all users.
|
165
|
+
#
|
166
|
+
# @param :with_meta [Boolean] include metadata
|
167
|
+
# @return [Array]
|
168
|
+
def users(**options)
|
169
|
+
options[:with_meta] = true unless options.key? :with_meta
|
170
|
+
|
171
|
+
wp[user_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |u|
|
172
|
+
next unless options[:with_meta]
|
173
|
+
|
174
|
+
u.map do |user|
|
175
|
+
user[:meta] = JSON.parse(user[:meta]).transform_keys(&:to_sym) unless user[:meta].blank?
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
private
|
181
|
+
|
182
|
+
# Finds all users. If it's a multisite WP, we need to check the
|
183
|
+
# main table.
|
184
|
+
#
|
185
|
+
# @param :with_meta [Boolean] include metadata
|
186
|
+
# @return [String]
|
187
|
+
def user_query(with_meta: true)
|
188
|
+
pfx = multisite&.prefix || prefix
|
189
|
+
|
190
|
+
<<~EOQ
|
191
|
+
select
|
192
|
+
u.*
|
193
|
+
#{', json_group_object(m.meta_key, m.meta_value) as meta' if with_meta}
|
194
|
+
from #{pfx}users as u
|
195
|
+
#{"left join #{pfx}usermeta as m on m.user_id = u.id" if with_meta}
|
196
|
+
group by u.id
|
197
|
+
EOQ
|
198
|
+
end
|
199
|
+
|
200
|
+
# Query for posts, optionally bringing metadata as JSON objects.
|
201
|
+
#
|
202
|
+
# @param :layout [String] Layout name
|
203
|
+
# @param :with_meta [Boolean] Query metadata
|
204
|
+
# @return [String]
|
205
|
+
def post_query(layout: nil, with_meta: true)
|
206
|
+
<<~EOQ
|
207
|
+
select
|
208
|
+
p.ID as id,
|
209
|
+
strftime('%Y-%m-%d %H:%M:%S UTC', p.post_date_gmt) as date,
|
210
|
+
strftime('%Y-%m-%d %H:%M:%S UTC', p.post_modified_gmt) as last_modified_at,
|
211
|
+
p.post_author as author,
|
212
|
+
p.post_type as layout,
|
213
|
+
p.post_name as slug,
|
214
|
+
p.post_title as title,
|
215
|
+
p.post_content as content,
|
216
|
+
p.post_excerpt as excerpt,
|
217
|
+
p.post_status as status,
|
218
|
+
p.comment_status as comment_status,
|
219
|
+
p.ping_status as ping_status,
|
220
|
+
p.post_password as password,
|
221
|
+
p.to_ping as to_ping,
|
222
|
+
p.pinged as pinged,
|
223
|
+
p.post_content_filtered as content_filtered,
|
224
|
+
p.post_parent as parent,
|
225
|
+
p.guid as guid,
|
226
|
+
p.menu_order as menu_order,
|
227
|
+
p.post_mime_type as mime_type,
|
228
|
+
p.comment_count as comment_count
|
229
|
+
#{', f.front_matter as front_matter' if with_meta}
|
230
|
+
#{', t.terms as terms' if with_meta}
|
231
|
+
from #{prefix}posts as p
|
232
|
+
#{"left join (#{meta_query}) as f on f.post_id = p.ID" if with_meta}
|
233
|
+
#{"left join (#{terms_query}) as t on t.post_id = p.ID" if with_meta}
|
234
|
+
#{"where p.post_type = '#{layout}'" if layout}
|
235
|
+
group by p.ID
|
236
|
+
EOQ
|
237
|
+
end
|
238
|
+
|
239
|
+
# Recover the post meta as a JSON object with multiple values
|
240
|
+
# converted to arrays
|
241
|
+
#
|
242
|
+
# @return [String]
|
243
|
+
def meta_query
|
244
|
+
<<~EOQ
|
245
|
+
select
|
246
|
+
post_id,
|
247
|
+
json_group_object(meta_key, json(meta_values)) as front_matter
|
248
|
+
from (
|
249
|
+
select
|
250
|
+
post_id,
|
251
|
+
meta_key,
|
252
|
+
json_group_array(meta_value) as meta_values
|
253
|
+
from #{prefix}postmeta
|
254
|
+
group by post_id, meta_key
|
255
|
+
)
|
256
|
+
group by post_id
|
257
|
+
EOQ
|
258
|
+
end
|
259
|
+
|
260
|
+
# Term taxonomy query
|
261
|
+
#
|
262
|
+
# @param :layout [String] Layout name
|
263
|
+
# @return [String]
|
264
|
+
def terms_query
|
265
|
+
<<~EOQ
|
266
|
+
select
|
267
|
+
post_id,
|
268
|
+
json_group_object(taxonomy, json(terms)) as terms
|
269
|
+
from (
|
270
|
+
select
|
271
|
+
r.object_id as post_id,
|
272
|
+
tt.taxonomy,
|
273
|
+
json_group_array(t.name) as terms
|
274
|
+
from #{prefix}term_relationships as r
|
275
|
+
left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
|
276
|
+
left join #{prefix}terms as t on t.term_id = tt.term_id
|
277
|
+
group by r.object_id)
|
278
|
+
group by post_id
|
279
|
+
EOQ
|
280
|
+
end
|
281
|
+
end
|
282
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require_relative 'wordpress_xml/post'
|
5
|
+
require_relative 'wordpress_xml/attachment'
|
6
|
+
|
7
|
+
module SuttyMigration
|
8
|
+
# Understands the XML dump generated by Wordpress and creates
|
9
|
+
# Jekyll::Documents
|
10
|
+
class WordpressXml
|
11
|
+
attr_reader :site, :file, :xml
|
12
|
+
|
13
|
+
# @param :site [Jekyll::Site] Jekyll site
|
14
|
+
# @param :file [String] File path
|
15
|
+
def initialize(site:, file:)
|
16
|
+
@site = site
|
17
|
+
@file = file
|
18
|
+
@xml = Nokogiri::XML File.read(file)
|
19
|
+
|
20
|
+
# Make things easier by removing namespaces.
|
21
|
+
xml.remove_namespaces!
|
22
|
+
end
|
23
|
+
|
24
|
+
def inspect
|
25
|
+
'#<SuttyMigration::WordpressXml>'
|
26
|
+
end
|
27
|
+
|
28
|
+
# Site URL
|
29
|
+
#
|
30
|
+
# @return [String]
|
31
|
+
def url
|
32
|
+
@url ||= attribute_value(xml, 'channel > link')
|
33
|
+
end
|
34
|
+
|
35
|
+
# Site title
|
36
|
+
#
|
37
|
+
# @return [String]
|
38
|
+
def title
|
39
|
+
@title ||= attribute_value(xml, 'channel > title')
|
40
|
+
end
|
41
|
+
|
42
|
+
# Description
|
43
|
+
#
|
44
|
+
# @return [String]
|
45
|
+
def description
|
46
|
+
@description ||= attribute_value(xml, 'channel > description')
|
47
|
+
end
|
48
|
+
|
49
|
+
# Language
|
50
|
+
#
|
51
|
+
# TODO: Migrate multilanguage sites.
|
52
|
+
#
|
53
|
+
# @return [String]
|
54
|
+
def language
|
55
|
+
@language ||= attribute_value(xml, 'channel > language')
|
56
|
+
end
|
57
|
+
|
58
|
+
# Authors with attributes, indexed by author email.
|
59
|
+
#
|
60
|
+
# @return [Hash]
|
61
|
+
def authors
|
62
|
+
@authors ||= xml.css('channel > author').map do |author|
|
63
|
+
{
|
64
|
+
attribute_value(author, 'author_email') => {
|
65
|
+
id: attribute_value(author, 'author_id').to_i,
|
66
|
+
display_name: attribute_value(author, 'author_display_name'),
|
67
|
+
first_name: attribute_value(author, 'author_first_name'),
|
68
|
+
last_name: attribute_value(author, 'author_last_name'),
|
69
|
+
email: attribute_value(author, 'author_email')
|
70
|
+
|
71
|
+
}
|
72
|
+
}
|
73
|
+
end.reduce(&:merge)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Categories with attributes, indexed by slug ("nicename")
|
77
|
+
#
|
78
|
+
# @return [Hash]
|
79
|
+
def categories
|
80
|
+
@categories ||= xml.css('channel > category').map do |category|
|
81
|
+
{
|
82
|
+
attribute_value(category, 'category_nicename') => {
|
83
|
+
id: attribute_value(category, 'term_id').to_i,
|
84
|
+
title: attribute_value(category, 'cat_name'),
|
85
|
+
parent: attribute_value(category, 'category_parent'),
|
86
|
+
slug: attribute_value(category, 'category_nicename')
|
87
|
+
}
|
88
|
+
}
|
89
|
+
end.reduce(&:merge)
|
90
|
+
end
|
91
|
+
|
92
|
+
# Tags with attributes, indexed by slug
|
93
|
+
#
|
94
|
+
# @return [Hash]
|
95
|
+
def tags
|
96
|
+
@tags ||= xml.css('channel > tag').map do |tag|
|
97
|
+
{
|
98
|
+
attribute_value(tag, 'tag_slug') => {
|
99
|
+
id: attribute_value(tag, 'term_id').to_i,
|
100
|
+
title: attribute_value(tag, 'tag_name'),
|
101
|
+
slug: attribute_value(tag, 'tag_slug')
|
102
|
+
}
|
103
|
+
}
|
104
|
+
end.reduce(&:merge)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Posts, indexed by ID
|
108
|
+
#
|
109
|
+
# @return [Hash]
|
110
|
+
def posts
|
111
|
+
@posts ||= items_find_by('post_type', 'post').map do |post|
|
112
|
+
{ attribute_value(post, 'post_id').to_i => Post.new(wordpress: self, item: post) }
|
113
|
+
end.reduce(&:merge)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Pages, indexed by ID
|
117
|
+
#
|
118
|
+
# @return [Hash]
|
119
|
+
def pages
|
120
|
+
@pages ||= items_find_by('post_type', 'page').map do |page|
|
121
|
+
{ attribute_value(page, 'post_id').to_i => Post.new(wordpress: self, item: page) }
|
122
|
+
end.reduce(&:merge)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Attachments, indexed by ID
|
126
|
+
#
|
127
|
+
# @return [Hash]
|
128
|
+
def attachments
|
129
|
+
@attachments ||= items_find_by('post_type', 'attachment').map do |attachment|
|
130
|
+
{ attribute_value(attachment, 'post_id').to_i => Attachment.new(wordpress: self, item: attachment) }
|
131
|
+
end.reduce(&:merge)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Find items by attribute and value
|
135
|
+
#
|
136
|
+
# @param [String] Attribute name
|
137
|
+
# @param [String] Attribute value
|
138
|
+
# @return [Nokogiri::NodeSet]
|
139
|
+
def items_find_by(attribute, value)
|
140
|
+
xml.css('channel > item').select do |item|
|
141
|
+
attribute_value(item, attribute) == value
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Get element's attribute value
|
146
|
+
#
|
147
|
+
# @param [Nokogiri::XML::Element]
|
148
|
+
# @param [String]
|
149
|
+
# @return [String]
|
150
|
+
def attribute_value(element, attribute)
|
151
|
+
element.at_css(attribute).text
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'post'
|
4
|
+
require 'php-serialize'
|
5
|
+
require 'faraday'
|
6
|
+
require 'progressbar'
|
7
|
+
|
8
|
+
module SuttyMigration
|
9
|
+
class WordpressXml
|
10
|
+
# Represents an attachment or uploaded file.
|
11
|
+
class Attachment < Post
|
12
|
+
# File URL
|
13
|
+
#
|
14
|
+
# @return [String]
|
15
|
+
def attachment_url
|
16
|
+
@attachment_url ||= attribute_value 'attachment_url'
|
17
|
+
end
|
18
|
+
|
19
|
+
# File destination
|
20
|
+
#
|
21
|
+
# @return [String]
|
22
|
+
def dest
|
23
|
+
@dest ||= URI(attachment_url).path.sub(%r{\A/}, '')
|
24
|
+
end
|
25
|
+
|
26
|
+
# Metadata, with file information as a Hash
|
27
|
+
#
|
28
|
+
# @return [Hash]
|
29
|
+
def meta
|
30
|
+
super.tap do |m|
|
31
|
+
m['_wp_attachment_metadata'] = PHP.unserialize m['_wp_attachment_metadata']
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Download the file if it doesn't exist. Optionally show a
|
36
|
+
# progress bar.
|
37
|
+
#
|
38
|
+
# @param :progress [Boolean]
|
39
|
+
# @return [Boolean]
|
40
|
+
def download(progress: true)
|
41
|
+
return true if File.exist? dest
|
42
|
+
|
43
|
+
::Jekyll.logger.info "Downloading #{dest}"
|
44
|
+
|
45
|
+
FileUtils.mkdir_p File.dirname(dest)
|
46
|
+
|
47
|
+
File.open(dest, 'w') do |f|
|
48
|
+
if progress
|
49
|
+
head = Faraday.head(attachment_url)
|
50
|
+
content_length = head.headers['content-length'].to_i
|
51
|
+
progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
|
52
|
+
end
|
53
|
+
|
54
|
+
Faraday.get(attachment_url) do |req|
|
55
|
+
req.options.on_data = proc do |chunk, downloaded_bytes|
|
56
|
+
f.write chunk
|
57
|
+
|
58
|
+
if progress
|
59
|
+
progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
File.exist? dest
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'wordpress_formatting/wpautop'
|
4
|
+
require 'jekyll/utils'
|
5
|
+
|
6
|
+
module SuttyMigration
|
7
|
+
class WordpressXml
|
8
|
+
# Represents a WordPress post
|
9
|
+
class Post
|
10
|
+
attr_reader :wordpress, :item
|
11
|
+
|
12
|
+
# @param :wordpress [SuttyMigration::WordpressXml]
|
13
|
+
# @param :item [Nokogiri::XML::Element]
|
14
|
+
def initialize(wordpress:, item:)
|
15
|
+
@wordpress = wordpress
|
16
|
+
@item = item
|
17
|
+
end
|
18
|
+
|
19
|
+
def inspect
|
20
|
+
"#<SuttyMigration::WordpressXml::Post title=\"#{title}\">"
|
21
|
+
end
|
22
|
+
|
23
|
+
# Post ID
|
24
|
+
#
|
25
|
+
# @return [Integer]
|
26
|
+
def id
|
27
|
+
@id ||= attribute_value('post_id').to_i
|
28
|
+
end
|
29
|
+
|
30
|
+
# Permalink. Absolute URL to the post.
|
31
|
+
#
|
32
|
+
# @return [String]
|
33
|
+
def permalink
|
34
|
+
@permalink ||= attribute_value('link').sub(wordpress.url, '')
|
35
|
+
end
|
36
|
+
|
37
|
+
# Title
|
38
|
+
#
|
39
|
+
# @return [String]
|
40
|
+
def title
|
41
|
+
@title ||= attribute_value('title')
|
42
|
+
end
|
43
|
+
|
44
|
+
# Description
|
45
|
+
#
|
46
|
+
# @return [String]
|
47
|
+
def description
|
48
|
+
@description ||= attribute_value('description')
|
49
|
+
end
|
50
|
+
|
51
|
+
# Slug ("post name")
|
52
|
+
#
|
53
|
+
# @return [String]
|
54
|
+
def slug
|
55
|
+
@slug ||= attribute_value('post_name')
|
56
|
+
end
|
57
|
+
|
58
|
+
# Publication date.
|
59
|
+
#
|
60
|
+
# WordPress can store this date in three different fields and
|
61
|
+
# sometimes they come empty or invalid.
|
62
|
+
#
|
63
|
+
# @return [Time]
|
64
|
+
def date
|
65
|
+
@date ||= %w[pubDate post_date_gmt post_date].map do |date_attr|
|
66
|
+
::Jekyll::Utils.parse_date attribute_value(date_attr)
|
67
|
+
rescue StandardError
|
68
|
+
end.compact.first
|
69
|
+
end
|
70
|
+
|
71
|
+
# Modification date.
|
72
|
+
#
|
73
|
+
# @return [Time]
|
74
|
+
def last_modified_at
|
75
|
+
@last_modified_at ||= ::Jekyll::Utils.parse_date attribute_value('post_modified_gmt')
|
76
|
+
end
|
77
|
+
|
78
|
+
# Content as HTML, with site URL removed.
|
79
|
+
#
|
80
|
+
# @return [String]
|
81
|
+
def content
|
82
|
+
@content ||= WordpressFormatting::Wpautop.wpautop(attribute_value('encoded')).gsub(
|
83
|
+
/ (href|src)="#{wordpress.url}/, ' \\1="'
|
84
|
+
)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Author attributes.
|
88
|
+
#
|
89
|
+
# @return [Hash]
|
90
|
+
def author
|
91
|
+
@author ||= wordpress.authors[attribute_value('creator')]
|
92
|
+
end
|
93
|
+
|
94
|
+
# Post password. Use with jekyll-crypto.
|
95
|
+
#
|
96
|
+
# @return [String]
|
97
|
+
def password
|
98
|
+
@password ||= attribute_value 'post_password'
|
99
|
+
end
|
100
|
+
|
101
|
+
# Tags with attributes.
|
102
|
+
#
|
103
|
+
# @return [Hash]
|
104
|
+
def tags
|
105
|
+
@tags ||= item.css('category').select do |c|
|
106
|
+
c[:domain] == 'post_tag'
|
107
|
+
end.map do |c|
|
108
|
+
wordpress.tags[c[:nicename]]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Categories with attributes.
|
113
|
+
#
|
114
|
+
# @return [Hash]
|
115
|
+
def categories
|
116
|
+
@categories ||= item.css('category').select do |c|
|
117
|
+
c[:domain] == 'category'
|
118
|
+
end.map do |c|
|
119
|
+
wordpress.categories[c[:nicename]]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Metadata. Plugins store useful information here. Duplicated
|
124
|
+
# keys are returned as an Array of values.
|
125
|
+
#
|
126
|
+
# @return [Hash]
|
127
|
+
def meta
|
128
|
+
@meta ||= {}.tap do |meta|
|
129
|
+
item.css('postmeta').each do |m|
|
130
|
+
key = m.css('meta_key').text
|
131
|
+
value = m.css('meta_value').text
|
132
|
+
|
133
|
+
case meta[key]
|
134
|
+
when nil then meta[key] = value
|
135
|
+
when String then meta[key] = [meta[key], value]
|
136
|
+
when Array then meta[key] << value
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Order. Higher are sorted on top by jekyll-order.
|
143
|
+
#
|
144
|
+
# @return [Integer]
|
145
|
+
def order
|
146
|
+
@order ||= attribute_value 'is_sticky'
|
147
|
+
end
|
148
|
+
|
149
|
+
# Publication status
|
150
|
+
#
|
151
|
+
# @return [Boolean]
|
152
|
+
def published?
|
153
|
+
@published ||= attribute_value('status') == 'publish'
|
154
|
+
end
|
155
|
+
|
156
|
+
# Publication status
|
157
|
+
#
|
158
|
+
# @return [Boolean]
|
159
|
+
def draft?
|
160
|
+
@draft ||= attribute_value('status') == 'draft'
|
161
|
+
end
|
162
|
+
|
163
|
+
# Get a value from the attribute
|
164
|
+
#
|
165
|
+
# @return [String]
|
166
|
+
def attribute_value(key)
|
167
|
+
item.at_css(key).text
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sutty-migration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- f
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jekyll
|
@@ -52,6 +52,118 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: faraday
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.4'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: progressbar
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.11'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.11'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: sqlite3
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.4'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.4'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: sequel
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '5.45'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '5.45'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: wordpress-formatting
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.1.0
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.1.0
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: nokogiri
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 1.12.0
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 1.12.0
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: php-serialize
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: 1.3.0
|
146
|
+
type: :runtime
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 1.3.0
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: pry
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
55
167
|
description: Takes datafiles and converts them into posts
|
56
168
|
email:
|
57
169
|
- f@sutty.nl
|
@@ -64,7 +176,13 @@ files:
|
|
64
176
|
- LICENSE.txt
|
65
177
|
- README.md
|
66
178
|
- lib/sutty-migration.rb
|
67
|
-
- lib/
|
179
|
+
- lib/sutty_migration/core_extensions.rb
|
180
|
+
- lib/sutty_migration/data.rb
|
181
|
+
- lib/sutty_migration/jekyll/document_creator.rb
|
182
|
+
- lib/sutty_migration/wordpress.rb
|
183
|
+
- lib/sutty_migration/wordpress_xml.rb
|
184
|
+
- lib/sutty_migration/wordpress_xml/attachment.rb
|
185
|
+
- lib/sutty_migration/wordpress_xml/post.rb
|
68
186
|
homepage: https://0xacab.org/sutty/jekyll/sutty-migration
|
69
187
|
licenses:
|
70
188
|
- GPL-3.0
|
data/lib/wordpress.rb
DELETED
@@ -1,174 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Debug
|
4
|
-
require 'pry'
|
5
|
-
# Generar UUIDs
|
6
|
-
require 'securerandom'
|
7
|
-
# Traer resultados de la base de datos
|
8
|
-
require 'sequel'
|
9
|
-
require 'sqlite3'
|
10
|
-
require 'json'
|
11
|
-
# Limpieza de contenido
|
12
|
-
require 'loofah'
|
13
|
-
require 'rails/html/scrubbers'
|
14
|
-
require 'rails/html/sanitizer'
|
15
|
-
require 'reverse_markdown'
|
16
|
-
# Descargar archivos
|
17
|
-
require 'faraday'
|
18
|
-
|
19
|
-
class Wordpress
|
20
|
-
attr_reader :site, :prefix, :limit, :url
|
21
|
-
|
22
|
-
def initialize(site:, url:, prefix: 'wp_', limit: 10)
|
23
|
-
@site = site
|
24
|
-
@prefix = prefix.freeze
|
25
|
-
@limit = limit.freeze
|
26
|
-
@url = url.freeze
|
27
|
-
|
28
|
-
# Conectarse a la base de datos
|
29
|
-
@wp = Sequel.sqlite(File.join(site.source, '_data', 'wordpress', 'post.sqlite3'))
|
30
|
-
# Las funciones de JSON usan mucha CPU, vamos a traer de a pocos
|
31
|
-
# registros.
|
32
|
-
@wp.extension :pagination
|
33
|
-
end
|
34
|
-
|
35
|
-
def download(file)
|
36
|
-
dest = 'wp-content/uploads/' + file
|
37
|
-
full = File.join(site.source, dest)
|
38
|
-
|
39
|
-
return dest if File.exist? full
|
40
|
-
|
41
|
-
Jekyll.logger.info "Downloading #{dest}"
|
42
|
-
|
43
|
-
FileUtils.mkdir_p File.dirname(full)
|
44
|
-
|
45
|
-
File.open(full, 'w') do |f|
|
46
|
-
Faraday.get(url + '/' + dest) do |req|
|
47
|
-
req.options.on_data = Proc.new do |chunk, _|
|
48
|
-
f.write chunk
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
dest
|
54
|
-
end
|
55
|
-
|
56
|
-
# Obtiene todos los posts opcionalmente filtrando por tipo de post.
|
57
|
-
# No es la forma oficial de Sequel pero no tenemos tiempo de
|
58
|
-
# aprenderla específicamente y además tenemos las opciones en formato
|
59
|
-
# JSON que no estarían soportadas.
|
60
|
-
def posts(layout: nil)
|
61
|
-
query = post_query.dup
|
62
|
-
query += " where post_type = '#{layout}'" if layout
|
63
|
-
query += ' group by posts.ID'
|
64
|
-
|
65
|
-
@wp[query].each_page(limit)
|
66
|
-
end
|
67
|
-
|
68
|
-
def meta(id:)
|
69
|
-
@wp[meta_query(id: id)].to_a
|
70
|
-
end
|
71
|
-
|
72
|
-
private
|
73
|
-
|
74
|
-
# Obtener todos los posts, json_objectagg requiere mariadb 10.5
|
75
|
-
def post_query
|
76
|
-
@post_query ||= <<~EOQ
|
77
|
-
select ID as id,
|
78
|
-
post_title as title,
|
79
|
-
post_name as slug,
|
80
|
-
post_type as layout,
|
81
|
-
strftime('%Y-%m-%d', post_date) as date,
|
82
|
-
post_status as status,
|
83
|
-
post_content as content,
|
84
|
-
json_group_object(meta_key, meta_value) as data
|
85
|
-
from #{prefix}posts as posts
|
86
|
-
left join #{prefix}postmeta as frontmatter
|
87
|
-
on posts.ID = frontmatter.post_id
|
88
|
-
EOQ
|
89
|
-
end
|
90
|
-
|
91
|
-
def meta_query(id:)
|
92
|
-
<<~EOQ
|
93
|
-
SELECT
|
94
|
-
terms.name AS `name`,
|
95
|
-
ttax.taxonomy AS `type`,
|
96
|
-
ttax.parent AS `parent`,
|
97
|
-
ttax.term_id AS `id`
|
98
|
-
FROM
|
99
|
-
#{prefix}terms AS `terms`,
|
100
|
-
#{prefix}term_relationships AS `trels`,
|
101
|
-
#{prefix}term_taxonomy AS `ttax`
|
102
|
-
WHERE
|
103
|
-
trels.object_id = '#{id}' AND
|
104
|
-
trels.term_taxonomy_id = ttax.term_taxonomy_id AND
|
105
|
-
terms.term_id = ttax.term_id
|
106
|
-
EOQ
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
# Antes de generar el sitio vamos a leer todos los artículos desde la
|
111
|
-
# base de datos y generarlos localmente.
|
112
|
-
Jekyll::Hooks.register :site, :post_read do |site|
|
113
|
-
wp = Wordpress.new(site: site,
|
114
|
-
url: site.config.dig('wordpress', 'url'),
|
115
|
-
prefix: site.config.dig('wordpress', 'prefix'))
|
116
|
-
|
117
|
-
collection = site.collections['posts']
|
118
|
-
ascii_re = Regexp.new("\P{ASCII}").freeze
|
119
|
-
sanitizer = Rails::Html::SafeListSanitizer.new
|
120
|
-
|
121
|
-
# Traer todas las imágenes cargadas y descargarlas
|
122
|
-
attachments = wp.posts(layout: 'attachment').map do |page|
|
123
|
-
page.map do |attachment|
|
124
|
-
attachment[:data] = JSON.parse(attachment[:data]) unless attachment[:data].nil?
|
125
|
-
file = attachment.dig(:data, '_wp_attached_file')
|
126
|
-
|
127
|
-
next unless file
|
128
|
-
|
129
|
-
dest = wp.download(file)
|
130
|
-
|
131
|
-
# Tener un mapa de IDs y archivos destino
|
132
|
-
[ attachment[:id], dest ]
|
133
|
-
end
|
134
|
-
end.compact.flatten(1).to_h
|
135
|
-
|
136
|
-
%w[post page].each do |type|
|
137
|
-
wp.posts(layout: type).each do |page|
|
138
|
-
page.each do |post|
|
139
|
-
# Convertir los datos extra en un Hash
|
140
|
-
post[:data] = JSON.parse(post[:data]) unless post[:data].nil?
|
141
|
-
post[:slug] = Jekyll::Utils.slugify(post[:title], mode: 'latin') if post[:slug].empty?
|
142
|
-
post[:meta] = wp.meta id: post[:id]
|
143
|
-
|
144
|
-
path = File.join(site.source, '_posts', post.slice(:date, :slug).values.join('-') + '.markdown')
|
145
|
-
|
146
|
-
if File.exist? path
|
147
|
-
Jekyll.logger.info "#{path} ya fue migrado, actualizando"
|
148
|
-
|
149
|
-
doc = site.documents.find do |d|
|
150
|
-
d['id'] == post[:id]
|
151
|
-
end
|
152
|
-
else
|
153
|
-
# Crear un post nuevo y agregarlo a la colección
|
154
|
-
collection.docs << doc = Jekyll::Document.new(path, site: site, collection: collection)
|
155
|
-
doc.data['uuid'] = SecureRandom.uuid
|
156
|
-
end
|
157
|
-
|
158
|
-
thumbnail = post.dig(:data, '_thumbnail_id')&.to_i
|
159
|
-
|
160
|
-
doc.data['layout'] = type
|
161
|
-
doc.data['title'] = post[:title]
|
162
|
-
doc.data['draft'] = post[:status] != 'publish'
|
163
|
-
doc.data['id'] = post[:id]
|
164
|
-
doc.data['date'] = Jekyll::Utils.parse_date(post[:date])
|
165
|
-
doc.data['tags'] = post[:meta].select { |k| k[:type] == 'post_tag' }.map { |k| k[:name] }
|
166
|
-
doc.data['categories'] = post[:meta].select { |k| k[:type] == 'category' }.map { |k| k[:name] }
|
167
|
-
doc.data['image'] = attachments[thumbnail] if thumbnail
|
168
|
-
|
169
|
-
doc.content = ReverseMarkdown.convert(sanitizer.sanitize(post[:content]))
|
170
|
-
doc.save
|
171
|
-
end
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|