sutty-migration 0.1.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +48 -0
- data/lib/sutty-migration.rb +1 -67
- data/lib/sutty_migration/core_extensions.rb +26 -0
- data/lib/sutty_migration/data.rb +77 -0
- data/lib/sutty_migration/jekyll/document_creator.rb +41 -0
- data/lib/sutty_migration/wordpress.rb +255 -0
- metadata +76 -3
- data/lib/wordpress.rb +0 -174
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f5a175d5ab6dd98eb4bed7ac987961ce9aaae15355c3eeb8e191805a6192ae34
|
|
4
|
+
data.tar.gz: f677b5f7945ba06403239197798d1b37eaea975c2057734c16484a0c75204f48
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9cb1bf01c37e40036dffdd19b14b78fbdf538af45625867eb6cbd45ab66bcb468a91b11a13ab86ca83e2d8692235ed0c6d4d919186d3a8713f9c81723c7978a6
|
|
7
|
+
data.tar.gz: a3fc41c535e1a028526beb8904be1a72b2eee49473e4036059e7b0d4bbeff9b8670bf20b564636709605200aeede0c928bf9c9613146339a01803d396c570c05
|
data/README.md
CHANGED
|
@@ -66,6 +66,54 @@ To start migration just build your site:
|
|
|
66
66
|
bundle exec jekyll build
|
|
67
67
|
```
|
|
68
68
|
|
|
69
|
+
**Tip:** Files can also be JSON, TSV and YAML, since they're all
|
|
70
|
+
supported by Jekyll.
|
|
71
|
+
|
|
72
|
+
### Wordpress
|
|
73
|
+
|
|
74
|
+
Instead of requiring you to install and configure MariaDB/MySQL, you can
|
|
75
|
+
convert the database into SQLite3 like this:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
git clone https://0xacab.org/sutty/mysql2sqlite.git
|
|
79
|
+
cd mysql2sqlite
|
|
80
|
+
./mysql2sqlite /path/to/database/dump.sql |
|
|
81
|
+
sed -re "s/, 0x([0-9a-f]+),/, X'\1',/i" |
|
|
82
|
+
sqlite3 wordpress.sqlite3
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
It will probably show some errors.
|
|
86
|
+
|
|
87
|
+
Note the `sed` command is required to convert hexadecimal values into
|
|
88
|
+
SQLite syntax, since `mysql2sqlite` doesn't support this yet.
|
|
89
|
+
|
|
90
|
+
Wordpress websites can include lots of posts and metadata, depending on
|
|
91
|
+
the amount of plugins installed. We don't have an official way of
|
|
92
|
+
dumping everything into Jekyll, because you will probably want to move
|
|
93
|
+
things around. You can write a plugin like this:
|
|
94
|
+
|
|
95
|
+
```ruby
|
|
96
|
+
# _plugins/wordpress.rb
|
|
97
|
+
# frozen_string_literal: true
|
|
98
|
+
|
|
99
|
+
require 'sutty_migration/wordpress'
|
|
100
|
+
require 'sutty_migration/jekyll/document_creator'
|
|
101
|
+
require 'jekyll-write-and-commit-changes'
|
|
102
|
+
|
|
103
|
+
Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
|
104
|
+
wp = SuttyMigration::Wordpress.new(site: site, database: 'wordpress.sqlite3', prefix: 'wp_', url: 'https://wordpre.ss')
|
|
105
|
+
|
|
106
|
+
# Download all files
|
|
107
|
+
wp.download_all
|
|
108
|
+
|
|
109
|
+
wp.posts(layout: 'post').each do |post|
|
|
110
|
+
doc = Jekyll::Document.create(site: site, title: post[:post_title], date: post[:post_date], collection: 'posts')
|
|
111
|
+
doc.content = post[:content]
|
|
112
|
+
doc.save
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
```
|
|
116
|
+
|
|
69
117
|
## Contributing
|
|
70
118
|
|
|
71
119
|
Bug reports and pull requests are welcome on 0xacab.org at
|
data/lib/sutty-migration.rb
CHANGED
|
@@ -1,69 +1,3 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
require 'fast_blank'
|
|
5
|
-
require 'jekyll-write-and-commit-changes'
|
|
6
|
-
|
|
7
|
-
Jekyll::Hooks.register :site, :post_read do |site|
|
|
8
|
-
documents = site.documents
|
|
9
|
-
|
|
10
|
-
site.data['layouts']&.each do |name, layout|
|
|
11
|
-
site.data.dig('migration', name)&.each do |row|
|
|
12
|
-
row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
|
|
13
|
-
|
|
14
|
-
document = documents.find do |doc|
|
|
15
|
-
doc.data['id'] == row['id']
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
document ||=
|
|
19
|
-
begin
|
|
20
|
-
base = "#{row['date'] || Date.today.to_s}-#{Jekyll::Utils.slugify(row['title'], mode: 'latin')}.markdown"
|
|
21
|
-
path = File.join(site.source, '_posts', base)
|
|
22
|
-
|
|
23
|
-
raise ArgumentError, "Row #{row['id']} duplicates file #{base}" if File.exist? path
|
|
24
|
-
|
|
25
|
-
doc = Jekyll::Document.new(path, site: site, collection: site.collections['posts'])
|
|
26
|
-
site.collections['posts'] << doc
|
|
27
|
-
|
|
28
|
-
doc
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
row.each do |attribute, value|
|
|
32
|
-
row[attribute] =
|
|
33
|
-
case layout.dig(attribute, 'type')
|
|
34
|
-
when 'string' then value
|
|
35
|
-
when 'text' then value
|
|
36
|
-
when 'tel' then value
|
|
37
|
-
when 'color' then value # TODO: validar
|
|
38
|
-
when 'date' then Jekyll::Utils.parse_date(value)
|
|
39
|
-
when 'email' then value # TODO: validar
|
|
40
|
-
when 'url' then value # TODO: validar
|
|
41
|
-
when 'content' then value
|
|
42
|
-
when 'markdown_content' then value
|
|
43
|
-
when 'markdown' then value
|
|
44
|
-
when 'number' then value.to_i
|
|
45
|
-
when 'order' then value.to_i
|
|
46
|
-
when 'boolean' then !value.strip.empty?
|
|
47
|
-
when 'array' then value.split(',').map(&:strip)
|
|
48
|
-
# TODO: procesar los valores en base a los valores predefinidos
|
|
49
|
-
when 'predefined_array' then value.split(',').map(&:strip)
|
|
50
|
-
when 'image' then { 'path' => value, 'description' => '' }
|
|
51
|
-
when 'file' then { 'path' => value, 'description' => '' }
|
|
52
|
-
when 'geo' then %w[lat lng].zip(value.split(',', 2).map(&:to_f)).to_h
|
|
53
|
-
when 'belongs_to' then value
|
|
54
|
-
when 'has_many' then value.split(',').map(&:strip)
|
|
55
|
-
when 'has_and_belongs_to_many' then value.split(',').map(&:strip)
|
|
56
|
-
when 'related_posts' then value.split(',').map(&:strip)
|
|
57
|
-
when 'locales' then value.split(',').map(&:strip)
|
|
58
|
-
else value
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
document.data['uuid'] ||= SecureRandom.uuid
|
|
63
|
-
document.content = row.delete('content')
|
|
64
|
-
|
|
65
|
-
document.data.merge! row
|
|
66
|
-
document.save
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
end
|
|
3
|
+
require_relative 'sutty_migration/data'
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Expandir String para poder verificar si está vacía
|
|
4
|
+
require 'fast_blank'
|
|
5
|
+
|
|
6
|
+
# Verificar que los valores nulos estén vacíos
|
|
7
|
+
class NilClass
|
|
8
|
+
def blank?
|
|
9
|
+
true
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def present?
|
|
13
|
+
false
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Verificar que una fecha está vacía
|
|
18
|
+
class Time
|
|
19
|
+
def blank?
|
|
20
|
+
false
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def present?
|
|
24
|
+
true
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'securerandom'
|
|
4
|
+
require_relative 'core_extensions'
|
|
5
|
+
require_relative 'jekyll/document_creator'
|
|
6
|
+
|
|
7
|
+
# Registers a plugin for converting CSV files into posts following
|
|
8
|
+
# Sutty's layout definition.
|
|
9
|
+
#
|
|
10
|
+
# If jekyll-write-and-commit-changes is enabled, documents will be saved
|
|
11
|
+
# on disk and commited is the build command is run with
|
|
12
|
+
# JEKYLL_ENV=production
|
|
13
|
+
Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
|
14
|
+
documents = site.documents
|
|
15
|
+
|
|
16
|
+
site.data['layouts']&.each do |name, layout|
|
|
17
|
+
site.data.dig('migration', name)&.each do |row|
|
|
18
|
+
row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
|
|
19
|
+
row['date'] ||= Time.now
|
|
20
|
+
|
|
21
|
+
unless row['id'].blank?
|
|
22
|
+
document = documents.find do |doc|
|
|
23
|
+
doc.data['id'] == row['id']
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
document ||= Jekyll::Document.create(site: site, collection: 'posts', **row.slice(*%w[date slug title]).transform_keys(&:to_sym))
|
|
28
|
+
|
|
29
|
+
row.each do |attribute, value|
|
|
30
|
+
next unless value.blank?
|
|
31
|
+
|
|
32
|
+
row[attribute] =
|
|
33
|
+
case layout.dig(attribute, 'type')
|
|
34
|
+
when 'string' then value
|
|
35
|
+
when 'text' then value
|
|
36
|
+
when 'tel' then value
|
|
37
|
+
# TODO: validate
|
|
38
|
+
when 'color' then value
|
|
39
|
+
when 'date' then Jekyll::Utils.parse_date(value)
|
|
40
|
+
# TODO: validate
|
|
41
|
+
when 'email' then value
|
|
42
|
+
# TODO: validate
|
|
43
|
+
when 'url' then value
|
|
44
|
+
when 'content' then value
|
|
45
|
+
when 'markdown_content' then value
|
|
46
|
+
when 'markdown' then value
|
|
47
|
+
when 'number' then value.to_i
|
|
48
|
+
when 'order' then value.to_i
|
|
49
|
+
when 'boolean' then !value.strip.empty?
|
|
50
|
+
when 'array' then value.split(',').map(&:strip)
|
|
51
|
+
# TODO: process values from the default array
|
|
52
|
+
when 'predefined_array' then value.split(',').map(&:strip)
|
|
53
|
+
when 'image' then { 'path' => value, 'description' => '' }
|
|
54
|
+
when 'file' then { 'path' => value, 'description' => '' }
|
|
55
|
+
when 'geo' then %w[lat lng].zip(value.split(',', 2).map(&:to_f)).to_h
|
|
56
|
+
when 'belongs_to' then value
|
|
57
|
+
when 'has_many' then value.split(',').map(&:strip)
|
|
58
|
+
when 'has_and_belongs_to_many' then value.split(',').map(&:strip)
|
|
59
|
+
when 'related_posts' then value.split(',').map(&:strip)
|
|
60
|
+
when 'locales' then value.split(',').map(&:strip)
|
|
61
|
+
else value
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
document.data['uuid'] ||= SecureRandom.uuid
|
|
66
|
+
document.content = row.delete('content')
|
|
67
|
+
|
|
68
|
+
document.data.merge! row
|
|
69
|
+
document.save if document.respond_to? :save
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
next unless site.respond_to?(:repository)
|
|
74
|
+
next unless ENV['JEKYLL_ENV'] == 'production'
|
|
75
|
+
|
|
76
|
+
site.repository.commit 'CSV Migration'
|
|
77
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'jekyll/utils'
|
|
4
|
+
require_relative '../core_extensions'
|
|
5
|
+
|
|
6
|
+
module SuttyMigration
|
|
7
|
+
module Jekyll
|
|
8
|
+
module DocumentCreator
|
|
9
|
+
class DocumentExists < ArgumentError; end
|
|
10
|
+
def self.included(base)
|
|
11
|
+
base.class_eval do
|
|
12
|
+
|
|
13
|
+
# Creates a new document in a collection or fails if it already
|
|
14
|
+
# exists.
|
|
15
|
+
#
|
|
16
|
+
# @param :site [Jekyll::Site] Jekyll site
|
|
17
|
+
# @param :date [Time] Post date
|
|
18
|
+
# @param :title [String] Post title
|
|
19
|
+
# @param :slug [String] Post slug, slugified title if empty
|
|
20
|
+
# @param :collection [Jekyll::Collection,String] Collection label or collection
|
|
21
|
+
# @return [Jekyll::Document] A new document
|
|
22
|
+
def self.create(site:, date:, title:, slug: nil, collection:)
|
|
23
|
+
collection = site.collections[collection] if collection.is_a? String
|
|
24
|
+
slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
|
|
25
|
+
basename = "#{date.strftime('%F')}-#{slug}.markdown"
|
|
26
|
+
path = File.join(collection.directory, basename)
|
|
27
|
+
|
|
28
|
+
raise DocumentExists, "#{path} already exists" if File.exist? path
|
|
29
|
+
|
|
30
|
+
::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
|
|
31
|
+
collection.docs << document
|
|
32
|
+
document.data['title'] = title
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
::Jekyll::Document.include SuttyMigration::Jekyll::DocumentCreator
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'time'
|
|
4
|
+
require 'securerandom'
|
|
5
|
+
require 'sequel'
|
|
6
|
+
require 'sqlite3'
|
|
7
|
+
require 'json'
|
|
8
|
+
require 'faraday'
|
|
9
|
+
require 'progressbar'
|
|
10
|
+
require 'jekyll/utils'
|
|
11
|
+
|
|
12
|
+
module SuttyMigration
|
|
13
|
+
# Brings posts and attachments from a SQLite3 database. You can
|
|
14
|
+
# convert a MySQL/MariaDB dump by using `mysql2sqlite`.
|
|
15
|
+
#
|
|
16
|
+
# It doesn't convert them into Jekyll posts but allows you to write a
|
|
17
|
+
# migration plugin where you can convert data by yourself. We may add
|
|
18
|
+
# this feature in the future.
|
|
19
|
+
class Wordpress
|
|
20
|
+
attr_reader :site, :prefix, :limit, :url, :wp, :database, :multisite
|
|
21
|
+
|
|
22
|
+
# @param :site [Jekyll::Site] Jekyll site
|
|
23
|
+
# @param :url [String] Wordpress site URL (must be up for downloads)
|
|
24
|
+
# @param :database [String] Database path, by default `_data/wordpress.sqlite3`
|
|
25
|
+
# @param :prefix [String] WP table prefix
|
|
26
|
+
# @param :limit [Integer] Page length
|
|
27
|
+
# @param :multisite [Boolean] Site is multisite
|
|
28
|
+
def initialize(site:, url:, database: nil, prefix: 'wp_', limit: 10, multisite: nil)
|
|
29
|
+
@site = site
|
|
30
|
+
@prefix = prefix.freeze
|
|
31
|
+
@limit = limit.freeze
|
|
32
|
+
@url = url.freeze
|
|
33
|
+
@database = database || File.join(site.source, '_data', 'wordpress.sqlite3')
|
|
34
|
+
@multisite = multisite
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Generate database connections for a multisite WP
|
|
38
|
+
#
|
|
39
|
+
# @return [Hash] { "ID" => SuttyMigration::Wordpress }
|
|
40
|
+
def blogs
|
|
41
|
+
@blogs ||= wp["select * from #{prefix}blogs"].to_a.map do |blog|
|
|
42
|
+
url = "https://#{blog[:domain]}#{blog[:path]}"
|
|
43
|
+
pfx = "#{prefix}#{blog[:blog_id]}_" if blog[:blog_id] > 1
|
|
44
|
+
pfx ||= prefix
|
|
45
|
+
|
|
46
|
+
[ blog[:blog_id], blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit, multisite: self)) ]
|
|
47
|
+
end.to_h
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def options
|
|
51
|
+
@options ||= wp["select option_name, option_value from #{prefix}options"].to_a.map(&:values).to_h.transform_keys(&:to_sym)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Open the database.
|
|
55
|
+
#
|
|
56
|
+
# @return [Sequel::SQLite::Database]
|
|
57
|
+
def wp
|
|
58
|
+
@wp ||= Sequel.sqlite(database).tap do |db|
|
|
59
|
+
db.extension :pagination
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Download all attachments. Adds the local path to them.
|
|
64
|
+
#
|
|
65
|
+
# @param :progress [Boolean] Toggle progress bar
|
|
66
|
+
# @return [Nil]
|
|
67
|
+
def download_all(progress: true)
|
|
68
|
+
posts(layout: 'attachment').each do |attachment|
|
|
69
|
+
attachment[:front_matter]['file_path'] = download(url: attachment[:guid], progress: progress)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Downloads a file if needed, optionally showing a progress bar.
|
|
74
|
+
#
|
|
75
|
+
# @param :url [String] File URL
|
|
76
|
+
# @param :progress [Boolean] Toggle progress bar
|
|
77
|
+
# @return [String] File local path
|
|
78
|
+
def download(url:, progress: true)
|
|
79
|
+
uri = URI(url)
|
|
80
|
+
dest = uri.path.sub(%r{\A/}, '')
|
|
81
|
+
full = File.join(site.source, dest)
|
|
82
|
+
|
|
83
|
+
return dest if File.exist? full
|
|
84
|
+
|
|
85
|
+
::Jekyll.logger.info "Downloading #{dest}"
|
|
86
|
+
|
|
87
|
+
FileUtils.mkdir_p File.dirname(full)
|
|
88
|
+
|
|
89
|
+
File.open(full, 'w') do |f|
|
|
90
|
+
if progress
|
|
91
|
+
head = Faraday.head(url)
|
|
92
|
+
content_length = head.headers['content-length'].to_i
|
|
93
|
+
progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
Faraday.get(url) do |req|
|
|
97
|
+
req.options.on_data = Proc.new do |chunk, downloaded_bytes|
|
|
98
|
+
f.write chunk
|
|
99
|
+
|
|
100
|
+
if progress
|
|
101
|
+
progress.progress = (downloaded_bytes > content_length) ? content_length : downloaded_bytes
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
dest
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# List post types
|
|
111
|
+
#
|
|
112
|
+
# @return [Array]
|
|
113
|
+
def layouts
|
|
114
|
+
@layouts ||= wp["select distinct post_type from #{prefix}posts"].to_a.map(&:values).flatten
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Finds all posts optionally filtering by post type. This is not
|
|
118
|
+
# the official Sequel syntax, but it retrieves metadata as objects
|
|
119
|
+
# with a single query (and a sub-query).
|
|
120
|
+
#
|
|
121
|
+
# @param :layout [String] Layout name, one of #layouts
|
|
122
|
+
# @param :with_meta [Boolean] Toggle metadata pulling and conversion
|
|
123
|
+
# @return [Enumerator]
|
|
124
|
+
def posts(**options)
|
|
125
|
+
unless options[:layout].blank? || layouts.include?(options[:layout])
|
|
126
|
+
raise ArgumentError, "#{options[:layout]} must be one of #{layouts.join(', ')}"
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
wp[post_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |p|
|
|
130
|
+
p.map do |post|
|
|
131
|
+
# Sequel parses dates on localtime
|
|
132
|
+
post[:date] = ::Jekyll::Utils.parse_date(post[:date]) unless post[:date].blank?
|
|
133
|
+
post[:last_modified_at] = ::Jekyll::Utils.parse_date(post[:last_modified_at]) unless post[:last_modified_at].blank?
|
|
134
|
+
|
|
135
|
+
post[:front_matter] = JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values { |v| v.size == 1 ? v.first : v } unless post[:front_matter].blank?
|
|
136
|
+
post[:terms] = JSON.parse(post[:terms]).transform_keys(&:to_sym) unless post[:terms].blank?
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Brings all users.
|
|
142
|
+
#
|
|
143
|
+
# @param :with_meta [Boolean] include metadata
|
|
144
|
+
# @return [Array]
|
|
145
|
+
def users(**options)
|
|
146
|
+
options[:with_meta] = true unless options.key? :with_meta
|
|
147
|
+
|
|
148
|
+
wp[user_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |u|
|
|
149
|
+
next unless options[:with_meta]
|
|
150
|
+
|
|
151
|
+
u.map do |user|
|
|
152
|
+
user[:meta] = JSON.parse(user[:meta]).transform_keys(&:to_sym) unless user[:meta].blank?
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
private
|
|
158
|
+
|
|
159
|
+
# Finds all users. If it's a multisite WP, we need to check the
|
|
160
|
+
# main table.
|
|
161
|
+
#
|
|
162
|
+
# @param :with_meta [Boolean] include metadata
|
|
163
|
+
# @return [String]
|
|
164
|
+
def user_query(with_meta: true)
|
|
165
|
+
pfx = multisite&.prefix || prefix
|
|
166
|
+
|
|
167
|
+
<<~EOQ
|
|
168
|
+
select
|
|
169
|
+
u.*
|
|
170
|
+
#{", json_group_object(m.meta_key, m.meta_value) as meta" if with_meta}
|
|
171
|
+
from #{pfx}users as u
|
|
172
|
+
#{"left join #{pfx}usermeta as m on m.user_id = u.id" if with_meta}
|
|
173
|
+
group by u.id
|
|
174
|
+
EOQ
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Query for posts, optionally bringing metadata as JSON objects.
|
|
178
|
+
#
|
|
179
|
+
# @param :layout [String] Layout name
|
|
180
|
+
# @param :with_meta [Boolean] Query metadata
|
|
181
|
+
# @return [String]
|
|
182
|
+
def post_query(layout: nil, with_meta: true)
|
|
183
|
+
<<~EOQ
|
|
184
|
+
select
|
|
185
|
+
p.ID as id,
|
|
186
|
+
strftime('%Y-%m-%d %H:%M:%S UTC', p.post_date_gmt) as date,
|
|
187
|
+
strftime('%Y-%m-%d %H:%M:%S UTC', p.post_modified_gmt) as last_modified_at,
|
|
188
|
+
p.post_author as author,
|
|
189
|
+
p.post_type as layout,
|
|
190
|
+
p.post_name as slug,
|
|
191
|
+
p.post_title as title,
|
|
192
|
+
p.post_content as content,
|
|
193
|
+
p.post_excerpt as excerpt,
|
|
194
|
+
p.post_status as status,
|
|
195
|
+
p.comment_status as comment_status,
|
|
196
|
+
p.ping_status as ping_status,
|
|
197
|
+
p.post_password as password,
|
|
198
|
+
p.to_ping as to_ping,
|
|
199
|
+
p.pinged as pinged,
|
|
200
|
+
p.post_content_filtered as content_filtered,
|
|
201
|
+
p.post_parent as parent,
|
|
202
|
+
p.guid as guid,
|
|
203
|
+
p.menu_order as menu_order,
|
|
204
|
+
p.post_mime_type as mime_type,
|
|
205
|
+
p.comment_count as comment_count
|
|
206
|
+
#{", f.front_matter as front_matter" if with_meta}
|
|
207
|
+
#{", t.terms as terms" if with_meta}
|
|
208
|
+
from #{prefix}posts as p
|
|
209
|
+
#{"left join (#{meta_query}) as f on f.post_id = p.ID" if with_meta}
|
|
210
|
+
#{"left join (#{terms_query(layout: layout)}) as t on t.id = p.ID" if with_meta}
|
|
211
|
+
#{"where p.post_type = '#{layout}'" if layout}
|
|
212
|
+
group by p.ID
|
|
213
|
+
EOQ
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Recover the post meta as a JSON object with multiple values
|
|
217
|
+
# converted to arrays
|
|
218
|
+
#
|
|
219
|
+
# @return [String]
|
|
220
|
+
def meta_query
|
|
221
|
+
<<~EOQ
|
|
222
|
+
select
|
|
223
|
+
post_id,
|
|
224
|
+
json_group_object(meta_key, json(meta_values)) as front_matter
|
|
225
|
+
from (
|
|
226
|
+
select
|
|
227
|
+
post_id,
|
|
228
|
+
meta_key,
|
|
229
|
+
json_group_array(meta_value) as meta_values
|
|
230
|
+
from #{prefix}postmeta
|
|
231
|
+
group by post_id, meta_key
|
|
232
|
+
)
|
|
233
|
+
group by post_id
|
|
234
|
+
EOQ
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Term taxonomy query
|
|
238
|
+
#
|
|
239
|
+
# @param :layout [String] Layout name
|
|
240
|
+
# @return [String]
|
|
241
|
+
def terms_query(layout: nil)
|
|
242
|
+
<<~EOQ
|
|
243
|
+
select
|
|
244
|
+
p.ID as id,
|
|
245
|
+
json_group_object(tt.taxonomy, t.name) as terms
|
|
246
|
+
from #{prefix}posts as p
|
|
247
|
+
left join #{prefix}term_relationships as r on r.object_id = p.ID
|
|
248
|
+
left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
|
|
249
|
+
left join #{prefix}terms as t on t.term_id = tt.term_id
|
|
250
|
+
#{"where p.post_type = '#{layout}'" if layout}
|
|
251
|
+
group by p.ID
|
|
252
|
+
EOQ
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sutty-migration
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- f
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-
|
|
11
|
+
date: 2021-06-10 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: jekyll
|
|
@@ -52,6 +52,76 @@ dependencies:
|
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '1.0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: faraday
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '1.4'
|
|
62
|
+
type: :runtime
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '1.4'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: progressbar
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '1.11'
|
|
76
|
+
type: :runtime
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '1.11'
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: sqlite3
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - "~>"
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '1.4'
|
|
90
|
+
type: :runtime
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - "~>"
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: '1.4'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: sequel
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - "~>"
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '5.45'
|
|
104
|
+
type: :runtime
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - "~>"
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: '5.45'
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
112
|
+
name: pry
|
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
|
114
|
+
requirements:
|
|
115
|
+
- - ">="
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: '0'
|
|
118
|
+
type: :development
|
|
119
|
+
prerelease: false
|
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
121
|
+
requirements:
|
|
122
|
+
- - ">="
|
|
123
|
+
- !ruby/object:Gem::Version
|
|
124
|
+
version: '0'
|
|
55
125
|
description: Takes datafiles and converts them into posts
|
|
56
126
|
email:
|
|
57
127
|
- f@sutty.nl
|
|
@@ -64,7 +134,10 @@ files:
|
|
|
64
134
|
- LICENSE.txt
|
|
65
135
|
- README.md
|
|
66
136
|
- lib/sutty-migration.rb
|
|
67
|
-
- lib/
|
|
137
|
+
- lib/sutty_migration/core_extensions.rb
|
|
138
|
+
- lib/sutty_migration/data.rb
|
|
139
|
+
- lib/sutty_migration/jekyll/document_creator.rb
|
|
140
|
+
- lib/sutty_migration/wordpress.rb
|
|
68
141
|
homepage: https://0xacab.org/sutty/jekyll/sutty-migration
|
|
69
142
|
licenses:
|
|
70
143
|
- GPL-3.0
|
data/lib/wordpress.rb
DELETED
|
@@ -1,174 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
# Debug
|
|
4
|
-
require 'pry'
|
|
5
|
-
# Generar UUIDs
|
|
6
|
-
require 'securerandom'
|
|
7
|
-
# Traer resultados de la base de datos
|
|
8
|
-
require 'sequel'
|
|
9
|
-
require 'sqlite3'
|
|
10
|
-
require 'json'
|
|
11
|
-
# Limpieza de contenido
|
|
12
|
-
require 'loofah'
|
|
13
|
-
require 'rails/html/scrubbers'
|
|
14
|
-
require 'rails/html/sanitizer'
|
|
15
|
-
require 'reverse_markdown'
|
|
16
|
-
# Descargar archivos
|
|
17
|
-
require 'faraday'
|
|
18
|
-
|
|
19
|
-
class Wordpress
|
|
20
|
-
attr_reader :site, :prefix, :limit, :url
|
|
21
|
-
|
|
22
|
-
def initialize(site:, url:, prefix: 'wp_', limit: 10)
|
|
23
|
-
@site = site
|
|
24
|
-
@prefix = prefix.freeze
|
|
25
|
-
@limit = limit.freeze
|
|
26
|
-
@url = url.freeze
|
|
27
|
-
|
|
28
|
-
# Conectarse a la base de datos
|
|
29
|
-
@wp = Sequel.sqlite(File.join(site.source, '_data', 'wordpress', 'post.sqlite3'))
|
|
30
|
-
# Las funciones de JSON usan mucha CPU, vamos a traer de a pocos
|
|
31
|
-
# registros.
|
|
32
|
-
@wp.extension :pagination
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def download(file)
|
|
36
|
-
dest = 'wp-content/uploads/' + file
|
|
37
|
-
full = File.join(site.source, dest)
|
|
38
|
-
|
|
39
|
-
return dest if File.exist? full
|
|
40
|
-
|
|
41
|
-
Jekyll.logger.info "Downloading #{dest}"
|
|
42
|
-
|
|
43
|
-
FileUtils.mkdir_p File.dirname(full)
|
|
44
|
-
|
|
45
|
-
File.open(full, 'w') do |f|
|
|
46
|
-
Faraday.get(url + '/' + dest) do |req|
|
|
47
|
-
req.options.on_data = Proc.new do |chunk, _|
|
|
48
|
-
f.write chunk
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
dest
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
# Obtiene todos los posts opcionalmente filtrando por tipo de post.
|
|
57
|
-
# No es la forma oficial de Sequel pero no tenemos tiempo de
|
|
58
|
-
# aprenderla específicamente y además tenemos las opciones en formato
|
|
59
|
-
# JSON que no estarían soportadas.
|
|
60
|
-
def posts(layout: nil)
|
|
61
|
-
query = post_query.dup
|
|
62
|
-
query += " where post_type = '#{layout}'" if layout
|
|
63
|
-
query += ' group by posts.ID'
|
|
64
|
-
|
|
65
|
-
@wp[query].each_page(limit)
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
def meta(id:)
|
|
69
|
-
@wp[meta_query(id: id)].to_a
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
private
|
|
73
|
-
|
|
74
|
-
# Obtener todos los posts, json_objectagg requiere mariadb 10.5
|
|
75
|
-
def post_query
|
|
76
|
-
@post_query ||= <<~EOQ
|
|
77
|
-
select ID as id,
|
|
78
|
-
post_title as title,
|
|
79
|
-
post_name as slug,
|
|
80
|
-
post_type as layout,
|
|
81
|
-
strftime('%Y-%m-%d', post_date) as date,
|
|
82
|
-
post_status as status,
|
|
83
|
-
post_content as content,
|
|
84
|
-
json_group_object(meta_key, meta_value) as data
|
|
85
|
-
from #{prefix}posts as posts
|
|
86
|
-
left join #{prefix}postmeta as frontmatter
|
|
87
|
-
on posts.ID = frontmatter.post_id
|
|
88
|
-
EOQ
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
def meta_query(id:)
|
|
92
|
-
<<~EOQ
|
|
93
|
-
SELECT
|
|
94
|
-
terms.name AS `name`,
|
|
95
|
-
ttax.taxonomy AS `type`,
|
|
96
|
-
ttax.parent AS `parent`,
|
|
97
|
-
ttax.term_id AS `id`
|
|
98
|
-
FROM
|
|
99
|
-
#{prefix}terms AS `terms`,
|
|
100
|
-
#{prefix}term_relationships AS `trels`,
|
|
101
|
-
#{prefix}term_taxonomy AS `ttax`
|
|
102
|
-
WHERE
|
|
103
|
-
trels.object_id = '#{id}' AND
|
|
104
|
-
trels.term_taxonomy_id = ttax.term_taxonomy_id AND
|
|
105
|
-
terms.term_id = ttax.term_id
|
|
106
|
-
EOQ
|
|
107
|
-
end
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
# Antes de generar el sitio vamos a leer todos los artículos desde la
|
|
111
|
-
# base de datos y generarlos localmente.
|
|
112
|
-
Jekyll::Hooks.register :site, :post_read do |site|
|
|
113
|
-
wp = Wordpress.new(site: site,
|
|
114
|
-
url: site.config.dig('wordpress', 'url'),
|
|
115
|
-
prefix: site.config.dig('wordpress', 'prefix'))
|
|
116
|
-
|
|
117
|
-
collection = site.collections['posts']
|
|
118
|
-
ascii_re = Regexp.new("\P{ASCII}").freeze
|
|
119
|
-
sanitizer = Rails::Html::SafeListSanitizer.new
|
|
120
|
-
|
|
121
|
-
# Traer todas las imágenes cargadas y descargarlas
|
|
122
|
-
attachments = wp.posts(layout: 'attachment').map do |page|
|
|
123
|
-
page.map do |attachment|
|
|
124
|
-
attachment[:data] = JSON.parse(attachment[:data]) unless attachment[:data].nil?
|
|
125
|
-
file = attachment.dig(:data, '_wp_attached_file')
|
|
126
|
-
|
|
127
|
-
next unless file
|
|
128
|
-
|
|
129
|
-
dest = wp.download(file)
|
|
130
|
-
|
|
131
|
-
# Tener un mapa de IDs y archivos destino
|
|
132
|
-
[ attachment[:id], dest ]
|
|
133
|
-
end
|
|
134
|
-
end.compact.flatten(1).to_h
|
|
135
|
-
|
|
136
|
-
%w[post page].each do |type|
|
|
137
|
-
wp.posts(layout: type).each do |page|
|
|
138
|
-
page.each do |post|
|
|
139
|
-
# Convertir los datos extra en un Hash
|
|
140
|
-
post[:data] = JSON.parse(post[:data]) unless post[:data].nil?
|
|
141
|
-
post[:slug] = Jekyll::Utils.slugify(post[:title], mode: 'latin') if post[:slug].empty?
|
|
142
|
-
post[:meta] = wp.meta id: post[:id]
|
|
143
|
-
|
|
144
|
-
path = File.join(site.source, '_posts', post.slice(:date, :slug).values.join('-') + '.markdown')
|
|
145
|
-
|
|
146
|
-
if File.exist? path
|
|
147
|
-
Jekyll.logger.info "#{path} ya fue migrado, actualizando"
|
|
148
|
-
|
|
149
|
-
doc = site.documents.find do |d|
|
|
150
|
-
d['id'] == post[:id]
|
|
151
|
-
end
|
|
152
|
-
else
|
|
153
|
-
# Crear un post nuevo y agregarlo a la colección
|
|
154
|
-
collection.docs << doc = Jekyll::Document.new(path, site: site, collection: collection)
|
|
155
|
-
doc.data['uuid'] = SecureRandom.uuid
|
|
156
|
-
end
|
|
157
|
-
|
|
158
|
-
thumbnail = post.dig(:data, '_thumbnail_id')&.to_i
|
|
159
|
-
|
|
160
|
-
doc.data['layout'] = type
|
|
161
|
-
doc.data['title'] = post[:title]
|
|
162
|
-
doc.data['draft'] = post[:status] != 'publish'
|
|
163
|
-
doc.data['id'] = post[:id]
|
|
164
|
-
doc.data['date'] = Jekyll::Utils.parse_date(post[:date])
|
|
165
|
-
doc.data['tags'] = post[:meta].select { |k| k[:type] == 'post_tag' }.map { |k| k[:name] }
|
|
166
|
-
doc.data['categories'] = post[:meta].select { |k| k[:type] == 'category' }.map { |k| k[:name] }
|
|
167
|
-
doc.data['image'] = attachments[thumbnail] if thumbnail
|
|
168
|
-
|
|
169
|
-
doc.content = ReverseMarkdown.convert(sanitizer.sanitize(post[:content]))
|
|
170
|
-
doc.save
|
|
171
|
-
end
|
|
172
|
-
end
|
|
173
|
-
end
|
|
174
|
-
end
|