sutty-migration 0.1.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +48 -0
- data/lib/sutty-migration.rb +1 -67
- data/lib/sutty_migration/core_extensions.rb +26 -0
- data/lib/sutty_migration/data.rb +77 -0
- data/lib/sutty_migration/jekyll/document_creator.rb +41 -0
- data/lib/sutty_migration/wordpress.rb +255 -0
- metadata +76 -3
- data/lib/wordpress.rb +0 -174
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5a175d5ab6dd98eb4bed7ac987961ce9aaae15355c3eeb8e191805a6192ae34
|
4
|
+
data.tar.gz: f677b5f7945ba06403239197798d1b37eaea975c2057734c16484a0c75204f48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9cb1bf01c37e40036dffdd19b14b78fbdf538af45625867eb6cbd45ab66bcb468a91b11a13ab86ca83e2d8692235ed0c6d4d919186d3a8713f9c81723c7978a6
|
7
|
+
data.tar.gz: a3fc41c535e1a028526beb8904be1a72b2eee49473e4036059e7b0d4bbeff9b8670bf20b564636709605200aeede0c928bf9c9613146339a01803d396c570c05
|
data/README.md
CHANGED
@@ -66,6 +66,54 @@ To start migration just build your site:
|
|
66
66
|
bundle exec jekyll build
|
67
67
|
```
|
68
68
|
|
69
|
+
**Tip:** Files can also be JSON, TSV and YAML, since they're all
|
70
|
+
supported by Jekyll.
|
71
|
+
|
72
|
+
### Wordpress
|
73
|
+
|
74
|
+
Instead of requiring you to install and configure MariaDB/MySQL, you can
|
75
|
+
convert the database into SQLite3 like this:
|
76
|
+
|
77
|
+
```bash
|
78
|
+
git clone https://0xacab.org/sutty/mysql2sqlite.git
|
79
|
+
cd mysql2sqlite
|
80
|
+
./mysql2sqlite /path/to/database/dump.sql |
|
81
|
+
sed -re "s/, 0x([0-9a-f]+),/, X'\1',/i" |
|
82
|
+
sqlite3 wordpress.sqlite3
|
83
|
+
```
|
84
|
+
|
85
|
+
It will probably show some errors.
|
86
|
+
|
87
|
+
Note the `sed` command is required to convert hexadecimal values into
|
88
|
+
SQLite syntax, since `mysql2sqlite` doesn't support this yet.
|
89
|
+
|
90
|
+
Wordpress websites can include lots of posts and metadata, depending on
|
91
|
+
the amount of plugins installed. We don't have an official way of
|
92
|
+
dumping everything into Jekyll, because you will probably want to move
|
93
|
+
things around. You can write a plugin like this:
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
# _plugins/wordpress.rb
|
97
|
+
# frozen_string_literal: true
|
98
|
+
|
99
|
+
require 'sutty_migration/wordpress'
|
100
|
+
require 'sutty_migration/jekyll/document_creator'
|
101
|
+
require 'jekyll-write-and-commit-changes'
|
102
|
+
|
103
|
+
Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
104
|
+
wp = SuttyMigration::Wordpress.new(site: site, database: 'wordpress.sqlite3', prefix: 'wp_', url: 'https://wordpre.ss')
|
105
|
+
|
106
|
+
# Download all files
|
107
|
+
wp.download_all
|
108
|
+
|
109
|
+
wp.posts(layout: 'post').each do |post|
|
110
|
+
doc = Jekyll::Document.create(site: site, title: post[:post_title], date: post[:post_date], collection: 'posts')
|
111
|
+
doc.content = post[:content]
|
112
|
+
doc.save
|
113
|
+
end
|
114
|
+
end
|
115
|
+
```
|
116
|
+
|
69
117
|
## Contributing
|
70
118
|
|
71
119
|
Bug reports and pull requests are welcome on 0xacab.org at
|
data/lib/sutty-migration.rb
CHANGED
@@ -1,69 +1,3 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
require 'fast_blank'
|
5
|
-
require 'jekyll-write-and-commit-changes'
|
6
|
-
|
7
|
-
Jekyll::Hooks.register :site, :post_read do |site|
|
8
|
-
documents = site.documents
|
9
|
-
|
10
|
-
site.data['layouts']&.each do |name, layout|
|
11
|
-
site.data.dig('migration', name)&.each do |row|
|
12
|
-
row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
|
13
|
-
|
14
|
-
document = documents.find do |doc|
|
15
|
-
doc.data['id'] == row['id']
|
16
|
-
end
|
17
|
-
|
18
|
-
document ||=
|
19
|
-
begin
|
20
|
-
base = "#{row['date'] || Date.today.to_s}-#{Jekyll::Utils.slugify(row['title'], mode: 'latin')}.markdown"
|
21
|
-
path = File.join(site.source, '_posts', base)
|
22
|
-
|
23
|
-
raise ArgumentError, "Row #{row['id']} duplicates file #{base}" if File.exist? path
|
24
|
-
|
25
|
-
doc = Jekyll::Document.new(path, site: site, collection: site.collections['posts'])
|
26
|
-
site.collections['posts'] << doc
|
27
|
-
|
28
|
-
doc
|
29
|
-
end
|
30
|
-
|
31
|
-
row.each do |attribute, value|
|
32
|
-
row[attribute] =
|
33
|
-
case layout.dig(attribute, 'type')
|
34
|
-
when 'string' then value
|
35
|
-
when 'text' then value
|
36
|
-
when 'tel' then value
|
37
|
-
when 'color' then value # TODO: validar
|
38
|
-
when 'date' then Jekyll::Utils.parse_date(value)
|
39
|
-
when 'email' then value # TODO: validar
|
40
|
-
when 'url' then value # TODO: validar
|
41
|
-
when 'content' then value
|
42
|
-
when 'markdown_content' then value
|
43
|
-
when 'markdown' then value
|
44
|
-
when 'number' then value.to_i
|
45
|
-
when 'order' then value.to_i
|
46
|
-
when 'boolean' then !value.strip.empty?
|
47
|
-
when 'array' then value.split(',').map(&:strip)
|
48
|
-
# TODO: procesar los valores en base a los valores predefinidos
|
49
|
-
when 'predefined_array' then value.split(',').map(&:strip)
|
50
|
-
when 'image' then { 'path' => value, 'description' => '' }
|
51
|
-
when 'file' then { 'path' => value, 'description' => '' }
|
52
|
-
when 'geo' then %w[lat lng].zip(value.split(',', 2).map(&:to_f)).to_h
|
53
|
-
when 'belongs_to' then value
|
54
|
-
when 'has_many' then value.split(',').map(&:strip)
|
55
|
-
when 'has_and_belongs_to_many' then value.split(',').map(&:strip)
|
56
|
-
when 'related_posts' then value.split(',').map(&:strip)
|
57
|
-
when 'locales' then value.split(',').map(&:strip)
|
58
|
-
else value
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
document.data['uuid'] ||= SecureRandom.uuid
|
63
|
-
document.content = row.delete('content')
|
64
|
-
|
65
|
-
document.data.merge! row
|
66
|
-
document.save
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
3
|
+
require_relative 'sutty_migration/data'
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Expandir String para poder verificar si está vacía
|
4
|
+
require 'fast_blank'
|
5
|
+
|
6
|
+
# Verificar que los valores nulos estén vacíos
|
7
|
+
class NilClass
|
8
|
+
def blank?
|
9
|
+
true
|
10
|
+
end
|
11
|
+
|
12
|
+
def present?
|
13
|
+
false
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Verificar que una fecha está vacía
|
18
|
+
class Time
|
19
|
+
def blank?
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
23
|
+
def present?
|
24
|
+
true
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'securerandom'
|
4
|
+
require_relative 'core_extensions'
|
5
|
+
require_relative 'jekyll/document_creator'
|
6
|
+
|
7
|
+
# Registers a plugin for converting CSV files into posts following
|
8
|
+
# Sutty's layout definition.
|
9
|
+
#
|
10
|
+
# If jekyll-write-and-commit-changes is enabled, documents will be saved
|
11
|
+
# on disk and commited is the build command is run with
|
12
|
+
# JEKYLL_ENV=production
|
13
|
+
Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
|
14
|
+
documents = site.documents
|
15
|
+
|
16
|
+
site.data['layouts']&.each do |name, layout|
|
17
|
+
site.data.dig('migration', name)&.each do |row|
|
18
|
+
row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
|
19
|
+
row['date'] ||= Time.now
|
20
|
+
|
21
|
+
unless row['id'].blank?
|
22
|
+
document = documents.find do |doc|
|
23
|
+
doc.data['id'] == row['id']
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
document ||= Jekyll::Document.create(site: site, collection: 'posts', **row.slice(*%w[date slug title]).transform_keys(&:to_sym))
|
28
|
+
|
29
|
+
row.each do |attribute, value|
|
30
|
+
next unless value.blank?
|
31
|
+
|
32
|
+
row[attribute] =
|
33
|
+
case layout.dig(attribute, 'type')
|
34
|
+
when 'string' then value
|
35
|
+
when 'text' then value
|
36
|
+
when 'tel' then value
|
37
|
+
# TODO: validate
|
38
|
+
when 'color' then value
|
39
|
+
when 'date' then Jekyll::Utils.parse_date(value)
|
40
|
+
# TODO: validate
|
41
|
+
when 'email' then value
|
42
|
+
# TODO: validate
|
43
|
+
when 'url' then value
|
44
|
+
when 'content' then value
|
45
|
+
when 'markdown_content' then value
|
46
|
+
when 'markdown' then value
|
47
|
+
when 'number' then value.to_i
|
48
|
+
when 'order' then value.to_i
|
49
|
+
when 'boolean' then !value.strip.empty?
|
50
|
+
when 'array' then value.split(',').map(&:strip)
|
51
|
+
# TODO: process values from the default array
|
52
|
+
when 'predefined_array' then value.split(',').map(&:strip)
|
53
|
+
when 'image' then { 'path' => value, 'description' => '' }
|
54
|
+
when 'file' then { 'path' => value, 'description' => '' }
|
55
|
+
when 'geo' then %w[lat lng].zip(value.split(',', 2).map(&:to_f)).to_h
|
56
|
+
when 'belongs_to' then value
|
57
|
+
when 'has_many' then value.split(',').map(&:strip)
|
58
|
+
when 'has_and_belongs_to_many' then value.split(',').map(&:strip)
|
59
|
+
when 'related_posts' then value.split(',').map(&:strip)
|
60
|
+
when 'locales' then value.split(',').map(&:strip)
|
61
|
+
else value
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
document.data['uuid'] ||= SecureRandom.uuid
|
66
|
+
document.content = row.delete('content')
|
67
|
+
|
68
|
+
document.data.merge! row
|
69
|
+
document.save if document.respond_to? :save
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
next unless site.respond_to?(:repository)
|
74
|
+
next unless ENV['JEKYLL_ENV'] == 'production'
|
75
|
+
|
76
|
+
site.repository.commit 'CSV Migration'
|
77
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'jekyll/utils'
|
4
|
+
require_relative '../core_extensions'
|
5
|
+
|
6
|
+
module SuttyMigration
|
7
|
+
module Jekyll
|
8
|
+
module DocumentCreator
|
9
|
+
class DocumentExists < ArgumentError; end
|
10
|
+
def self.included(base)
|
11
|
+
base.class_eval do
|
12
|
+
|
13
|
+
# Creates a new document in a collection or fails if it already
|
14
|
+
# exists.
|
15
|
+
#
|
16
|
+
# @param :site [Jekyll::Site] Jekyll site
|
17
|
+
# @param :date [Time] Post date
|
18
|
+
# @param :title [String] Post title
|
19
|
+
# @param :slug [String] Post slug, slugified title if empty
|
20
|
+
# @param :collection [Jekyll::Collection,String] Collection label or collection
|
21
|
+
# @return [Jekyll::Document] A new document
|
22
|
+
def self.create(site:, date:, title:, slug: nil, collection:)
|
23
|
+
collection = site.collections[collection] if collection.is_a? String
|
24
|
+
slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
|
25
|
+
basename = "#{date.strftime('%F')}-#{slug}.markdown"
|
26
|
+
path = File.join(collection.directory, basename)
|
27
|
+
|
28
|
+
raise DocumentExists, "#{path} already exists" if File.exist? path
|
29
|
+
|
30
|
+
::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
|
31
|
+
collection.docs << document
|
32
|
+
document.data['title'] = title
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
::Jekyll::Document.include SuttyMigration::Jekyll::DocumentCreator
|
@@ -0,0 +1,255 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'time'
|
4
|
+
require 'securerandom'
|
5
|
+
require 'sequel'
|
6
|
+
require 'sqlite3'
|
7
|
+
require 'json'
|
8
|
+
require 'faraday'
|
9
|
+
require 'progressbar'
|
10
|
+
require 'jekyll/utils'
|
11
|
+
|
12
|
+
module SuttyMigration
|
13
|
+
# Brings posts and attachments from a SQLite3 database. You can
|
14
|
+
# convert a MySQL/MariaDB dump by using `mysql2sqlite`.
|
15
|
+
#
|
16
|
+
# It doesn't convert them into Jekyll posts but allows you to write a
|
17
|
+
# migration plugin where you can convert data by yourself. We may add
|
18
|
+
# this feature in the future.
|
19
|
+
class Wordpress
|
20
|
+
attr_reader :site, :prefix, :limit, :url, :wp, :database, :multisite
|
21
|
+
|
22
|
+
# @param :site [Jekyll::Site] Jekyll site
|
23
|
+
# @param :url [String] Wordpress site URL (must be up for downloads)
|
24
|
+
# @param :database [String] Database path, by default `_data/wordpress.sqlite3`
|
25
|
+
# @param :prefix [String] WP table prefix
|
26
|
+
# @param :limit [Integer] Page length
|
27
|
+
# @param :multisite [Boolean] Site is multisite
|
28
|
+
def initialize(site:, url:, database: nil, prefix: 'wp_', limit: 10, multisite: nil)
|
29
|
+
@site = site
|
30
|
+
@prefix = prefix.freeze
|
31
|
+
@limit = limit.freeze
|
32
|
+
@url = url.freeze
|
33
|
+
@database = database || File.join(site.source, '_data', 'wordpress.sqlite3')
|
34
|
+
@multisite = multisite
|
35
|
+
end
|
36
|
+
|
37
|
+
# Generate database connections for a multisite WP
|
38
|
+
#
|
39
|
+
# @return [Hash] { "ID" => SuttyMigration::Wordpress }
|
40
|
+
def blogs
|
41
|
+
@blogs ||= wp["select * from #{prefix}blogs"].to_a.map do |blog|
|
42
|
+
url = "https://#{blog[:domain]}#{blog[:path]}"
|
43
|
+
pfx = "#{prefix}#{blog[:blog_id]}_" if blog[:blog_id] > 1
|
44
|
+
pfx ||= prefix
|
45
|
+
|
46
|
+
[ blog[:blog_id], blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit, multisite: self)) ]
|
47
|
+
end.to_h
|
48
|
+
end
|
49
|
+
|
50
|
+
def options
|
51
|
+
@options ||= wp["select option_name, option_value from #{prefix}options"].to_a.map(&:values).to_h.transform_keys(&:to_sym)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Open the database.
|
55
|
+
#
|
56
|
+
# @return [Sequel::SQLite::Database]
|
57
|
+
def wp
|
58
|
+
@wp ||= Sequel.sqlite(database).tap do |db|
|
59
|
+
db.extension :pagination
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Download all attachments. Adds the local path to them.
|
64
|
+
#
|
65
|
+
# @param :progress [Boolean] Toggle progress bar
|
66
|
+
# @return [Nil]
|
67
|
+
def download_all(progress: true)
|
68
|
+
posts(layout: 'attachment').each do |attachment|
|
69
|
+
attachment[:front_matter]['file_path'] = download(url: attachment[:guid], progress: progress)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Downloads a file if needed, optionally showing a progress bar.
|
74
|
+
#
|
75
|
+
# @param :url [String] File URL
|
76
|
+
# @param :progress [Boolean] Toggle progress bar
|
77
|
+
# @return [String] File local path
|
78
|
+
def download(url:, progress: true)
|
79
|
+
uri = URI(url)
|
80
|
+
dest = uri.path.sub(%r{\A/}, '')
|
81
|
+
full = File.join(site.source, dest)
|
82
|
+
|
83
|
+
return dest if File.exist? full
|
84
|
+
|
85
|
+
::Jekyll.logger.info "Downloading #{dest}"
|
86
|
+
|
87
|
+
FileUtils.mkdir_p File.dirname(full)
|
88
|
+
|
89
|
+
File.open(full, 'w') do |f|
|
90
|
+
if progress
|
91
|
+
head = Faraday.head(url)
|
92
|
+
content_length = head.headers['content-length'].to_i
|
93
|
+
progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
|
94
|
+
end
|
95
|
+
|
96
|
+
Faraday.get(url) do |req|
|
97
|
+
req.options.on_data = Proc.new do |chunk, downloaded_bytes|
|
98
|
+
f.write chunk
|
99
|
+
|
100
|
+
if progress
|
101
|
+
progress.progress = (downloaded_bytes > content_length) ? content_length : downloaded_bytes
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
dest
|
108
|
+
end
|
109
|
+
|
110
|
+
# List post types
|
111
|
+
#
|
112
|
+
# @return [Array]
|
113
|
+
def layouts
|
114
|
+
@layouts ||= wp["select distinct post_type from #{prefix}posts"].to_a.map(&:values).flatten
|
115
|
+
end
|
116
|
+
|
117
|
+
# Finds all posts optionally filtering by post type. This is not
|
118
|
+
# the official Sequel syntax, but it retrieves metadata as objects
|
119
|
+
# with a single query (and a sub-query).
|
120
|
+
#
|
121
|
+
# @param :layout [String] Layout name, one of #layouts
|
122
|
+
# @param :with_meta [Boolean] Toggle metadata pulling and conversion
|
123
|
+
# @return [Enumerator]
|
124
|
+
def posts(**options)
|
125
|
+
unless options[:layout].blank? || layouts.include?(options[:layout])
|
126
|
+
raise ArgumentError, "#{options[:layout]} must be one of #{layouts.join(', ')}"
|
127
|
+
end
|
128
|
+
|
129
|
+
wp[post_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |p|
|
130
|
+
p.map do |post|
|
131
|
+
# Sequel parses dates on localtime
|
132
|
+
post[:date] = ::Jekyll::Utils.parse_date(post[:date]) unless post[:date].blank?
|
133
|
+
post[:last_modified_at] = ::Jekyll::Utils.parse_date(post[:last_modified_at]) unless post[:last_modified_at].blank?
|
134
|
+
|
135
|
+
post[:front_matter] = JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values { |v| v.size == 1 ? v.first : v } unless post[:front_matter].blank?
|
136
|
+
post[:terms] = JSON.parse(post[:terms]).transform_keys(&:to_sym) unless post[:terms].blank?
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Brings all users.
|
142
|
+
#
|
143
|
+
# @param :with_meta [Boolean] include metadata
|
144
|
+
# @return [Array]
|
145
|
+
def users(**options)
|
146
|
+
options[:with_meta] = true unless options.key? :with_meta
|
147
|
+
|
148
|
+
wp[user_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |u|
|
149
|
+
next unless options[:with_meta]
|
150
|
+
|
151
|
+
u.map do |user|
|
152
|
+
user[:meta] = JSON.parse(user[:meta]).transform_keys(&:to_sym) unless user[:meta].blank?
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
private
|
158
|
+
|
159
|
+
# Finds all users. If it's a multisite WP, we need to check the
|
160
|
+
# main table.
|
161
|
+
#
|
162
|
+
# @param :with_meta [Boolean] include metadata
|
163
|
+
# @return [String]
|
164
|
+
def user_query(with_meta: true)
|
165
|
+
pfx = multisite&.prefix || prefix
|
166
|
+
|
167
|
+
<<~EOQ
|
168
|
+
select
|
169
|
+
u.*
|
170
|
+
#{", json_group_object(m.meta_key, m.meta_value) as meta" if with_meta}
|
171
|
+
from #{pfx}users as u
|
172
|
+
#{"left join #{pfx}usermeta as m on m.user_id = u.id" if with_meta}
|
173
|
+
group by u.id
|
174
|
+
EOQ
|
175
|
+
end
|
176
|
+
|
177
|
+
# Query for posts, optionally bringing metadata as JSON objects.
|
178
|
+
#
|
179
|
+
# @param :layout [String] Layout name
|
180
|
+
# @param :with_meta [Boolean] Query metadata
|
181
|
+
# @return [String]
|
182
|
+
def post_query(layout: nil, with_meta: true)
|
183
|
+
<<~EOQ
|
184
|
+
select
|
185
|
+
p.ID as id,
|
186
|
+
strftime('%Y-%m-%d %H:%M:%S UTC', p.post_date_gmt) as date,
|
187
|
+
strftime('%Y-%m-%d %H:%M:%S UTC', p.post_modified_gmt) as last_modified_at,
|
188
|
+
p.post_author as author,
|
189
|
+
p.post_type as layout,
|
190
|
+
p.post_name as slug,
|
191
|
+
p.post_title as title,
|
192
|
+
p.post_content as content,
|
193
|
+
p.post_excerpt as excerpt,
|
194
|
+
p.post_status as status,
|
195
|
+
p.comment_status as comment_status,
|
196
|
+
p.ping_status as ping_status,
|
197
|
+
p.post_password as password,
|
198
|
+
p.to_ping as to_ping,
|
199
|
+
p.pinged as pinged,
|
200
|
+
p.post_content_filtered as content_filtered,
|
201
|
+
p.post_parent as parent,
|
202
|
+
p.guid as guid,
|
203
|
+
p.menu_order as menu_order,
|
204
|
+
p.post_mime_type as mime_type,
|
205
|
+
p.comment_count as comment_count
|
206
|
+
#{", f.front_matter as front_matter" if with_meta}
|
207
|
+
#{", t.terms as terms" if with_meta}
|
208
|
+
from #{prefix}posts as p
|
209
|
+
#{"left join (#{meta_query}) as f on f.post_id = p.ID" if with_meta}
|
210
|
+
#{"left join (#{terms_query(layout: layout)}) as t on t.id = p.ID" if with_meta}
|
211
|
+
#{"where p.post_type = '#{layout}'" if layout}
|
212
|
+
group by p.ID
|
213
|
+
EOQ
|
214
|
+
end
|
215
|
+
|
216
|
+
# Recover the post meta as a JSON object with multiple values
|
217
|
+
# converted to arrays
|
218
|
+
#
|
219
|
+
# @return [String]
|
220
|
+
def meta_query
|
221
|
+
<<~EOQ
|
222
|
+
select
|
223
|
+
post_id,
|
224
|
+
json_group_object(meta_key, json(meta_values)) as front_matter
|
225
|
+
from (
|
226
|
+
select
|
227
|
+
post_id,
|
228
|
+
meta_key,
|
229
|
+
json_group_array(meta_value) as meta_values
|
230
|
+
from #{prefix}postmeta
|
231
|
+
group by post_id, meta_key
|
232
|
+
)
|
233
|
+
group by post_id
|
234
|
+
EOQ
|
235
|
+
end
|
236
|
+
|
237
|
+
# Term taxonomy query
|
238
|
+
#
|
239
|
+
# @param :layout [String] Layout name
|
240
|
+
# @return [String]
|
241
|
+
def terms_query(layout: nil)
|
242
|
+
<<~EOQ
|
243
|
+
select
|
244
|
+
p.ID as id,
|
245
|
+
json_group_object(tt.taxonomy, t.name) as terms
|
246
|
+
from #{prefix}posts as p
|
247
|
+
left join #{prefix}term_relationships as r on r.object_id = p.ID
|
248
|
+
left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
|
249
|
+
left join #{prefix}terms as t on t.term_id = tt.term_id
|
250
|
+
#{"where p.post_type = '#{layout}'" if layout}
|
251
|
+
group by p.ID
|
252
|
+
EOQ
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sutty-migration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- f
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jekyll
|
@@ -52,6 +52,76 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '1.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: faraday
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.4'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: progressbar
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.11'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.11'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: sqlite3
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.4'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.4'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: sequel
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '5.45'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '5.45'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: pry
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
55
125
|
description: Takes datafiles and converts them into posts
|
56
126
|
email:
|
57
127
|
- f@sutty.nl
|
@@ -64,7 +134,10 @@ files:
|
|
64
134
|
- LICENSE.txt
|
65
135
|
- README.md
|
66
136
|
- lib/sutty-migration.rb
|
67
|
-
- lib/
|
137
|
+
- lib/sutty_migration/core_extensions.rb
|
138
|
+
- lib/sutty_migration/data.rb
|
139
|
+
- lib/sutty_migration/jekyll/document_creator.rb
|
140
|
+
- lib/sutty_migration/wordpress.rb
|
68
141
|
homepage: https://0xacab.org/sutty/jekyll/sutty-migration
|
69
142
|
licenses:
|
70
143
|
- GPL-3.0
|
data/lib/wordpress.rb
DELETED
@@ -1,174 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Debug
|
4
|
-
require 'pry'
|
5
|
-
# Generar UUIDs
|
6
|
-
require 'securerandom'
|
7
|
-
# Traer resultados de la base de datos
|
8
|
-
require 'sequel'
|
9
|
-
require 'sqlite3'
|
10
|
-
require 'json'
|
11
|
-
# Limpieza de contenido
|
12
|
-
require 'loofah'
|
13
|
-
require 'rails/html/scrubbers'
|
14
|
-
require 'rails/html/sanitizer'
|
15
|
-
require 'reverse_markdown'
|
16
|
-
# Descargar archivos
|
17
|
-
require 'faraday'
|
18
|
-
|
19
|
-
class Wordpress
|
20
|
-
attr_reader :site, :prefix, :limit, :url
|
21
|
-
|
22
|
-
def initialize(site:, url:, prefix: 'wp_', limit: 10)
|
23
|
-
@site = site
|
24
|
-
@prefix = prefix.freeze
|
25
|
-
@limit = limit.freeze
|
26
|
-
@url = url.freeze
|
27
|
-
|
28
|
-
# Conectarse a la base de datos
|
29
|
-
@wp = Sequel.sqlite(File.join(site.source, '_data', 'wordpress', 'post.sqlite3'))
|
30
|
-
# Las funciones de JSON usan mucha CPU, vamos a traer de a pocos
|
31
|
-
# registros.
|
32
|
-
@wp.extension :pagination
|
33
|
-
end
|
34
|
-
|
35
|
-
def download(file)
|
36
|
-
dest = 'wp-content/uploads/' + file
|
37
|
-
full = File.join(site.source, dest)
|
38
|
-
|
39
|
-
return dest if File.exist? full
|
40
|
-
|
41
|
-
Jekyll.logger.info "Downloading #{dest}"
|
42
|
-
|
43
|
-
FileUtils.mkdir_p File.dirname(full)
|
44
|
-
|
45
|
-
File.open(full, 'w') do |f|
|
46
|
-
Faraday.get(url + '/' + dest) do |req|
|
47
|
-
req.options.on_data = Proc.new do |chunk, _|
|
48
|
-
f.write chunk
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
dest
|
54
|
-
end
|
55
|
-
|
56
|
-
# Obtiene todos los posts opcionalmente filtrando por tipo de post.
|
57
|
-
# No es la forma oficial de Sequel pero no tenemos tiempo de
|
58
|
-
# aprenderla específicamente y además tenemos las opciones en formato
|
59
|
-
# JSON que no estarían soportadas.
|
60
|
-
def posts(layout: nil)
|
61
|
-
query = post_query.dup
|
62
|
-
query += " where post_type = '#{layout}'" if layout
|
63
|
-
query += ' group by posts.ID'
|
64
|
-
|
65
|
-
@wp[query].each_page(limit)
|
66
|
-
end
|
67
|
-
|
68
|
-
def meta(id:)
|
69
|
-
@wp[meta_query(id: id)].to_a
|
70
|
-
end
|
71
|
-
|
72
|
-
private
|
73
|
-
|
74
|
-
# Obtener todos los posts, json_objectagg requiere mariadb 10.5
|
75
|
-
def post_query
|
76
|
-
@post_query ||= <<~EOQ
|
77
|
-
select ID as id,
|
78
|
-
post_title as title,
|
79
|
-
post_name as slug,
|
80
|
-
post_type as layout,
|
81
|
-
strftime('%Y-%m-%d', post_date) as date,
|
82
|
-
post_status as status,
|
83
|
-
post_content as content,
|
84
|
-
json_group_object(meta_key, meta_value) as data
|
85
|
-
from #{prefix}posts as posts
|
86
|
-
left join #{prefix}postmeta as frontmatter
|
87
|
-
on posts.ID = frontmatter.post_id
|
88
|
-
EOQ
|
89
|
-
end
|
90
|
-
|
91
|
-
def meta_query(id:)
|
92
|
-
<<~EOQ
|
93
|
-
SELECT
|
94
|
-
terms.name AS `name`,
|
95
|
-
ttax.taxonomy AS `type`,
|
96
|
-
ttax.parent AS `parent`,
|
97
|
-
ttax.term_id AS `id`
|
98
|
-
FROM
|
99
|
-
#{prefix}terms AS `terms`,
|
100
|
-
#{prefix}term_relationships AS `trels`,
|
101
|
-
#{prefix}term_taxonomy AS `ttax`
|
102
|
-
WHERE
|
103
|
-
trels.object_id = '#{id}' AND
|
104
|
-
trels.term_taxonomy_id = ttax.term_taxonomy_id AND
|
105
|
-
terms.term_id = ttax.term_id
|
106
|
-
EOQ
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
# Antes de generar el sitio vamos a leer todos los artículos desde la
|
111
|
-
# base de datos y generarlos localmente.
|
112
|
-
Jekyll::Hooks.register :site, :post_read do |site|
|
113
|
-
wp = Wordpress.new(site: site,
|
114
|
-
url: site.config.dig('wordpress', 'url'),
|
115
|
-
prefix: site.config.dig('wordpress', 'prefix'))
|
116
|
-
|
117
|
-
collection = site.collections['posts']
|
118
|
-
ascii_re = Regexp.new("\P{ASCII}").freeze
|
119
|
-
sanitizer = Rails::Html::SafeListSanitizer.new
|
120
|
-
|
121
|
-
# Traer todas las imágenes cargadas y descargarlas
|
122
|
-
attachments = wp.posts(layout: 'attachment').map do |page|
|
123
|
-
page.map do |attachment|
|
124
|
-
attachment[:data] = JSON.parse(attachment[:data]) unless attachment[:data].nil?
|
125
|
-
file = attachment.dig(:data, '_wp_attached_file')
|
126
|
-
|
127
|
-
next unless file
|
128
|
-
|
129
|
-
dest = wp.download(file)
|
130
|
-
|
131
|
-
# Tener un mapa de IDs y archivos destino
|
132
|
-
[ attachment[:id], dest ]
|
133
|
-
end
|
134
|
-
end.compact.flatten(1).to_h
|
135
|
-
|
136
|
-
%w[post page].each do |type|
|
137
|
-
wp.posts(layout: type).each do |page|
|
138
|
-
page.each do |post|
|
139
|
-
# Convertir los datos extra en un Hash
|
140
|
-
post[:data] = JSON.parse(post[:data]) unless post[:data].nil?
|
141
|
-
post[:slug] = Jekyll::Utils.slugify(post[:title], mode: 'latin') if post[:slug].empty?
|
142
|
-
post[:meta] = wp.meta id: post[:id]
|
143
|
-
|
144
|
-
path = File.join(site.source, '_posts', post.slice(:date, :slug).values.join('-') + '.markdown')
|
145
|
-
|
146
|
-
if File.exist? path
|
147
|
-
Jekyll.logger.info "#{path} ya fue migrado, actualizando"
|
148
|
-
|
149
|
-
doc = site.documents.find do |d|
|
150
|
-
d['id'] == post[:id]
|
151
|
-
end
|
152
|
-
else
|
153
|
-
# Crear un post nuevo y agregarlo a la colección
|
154
|
-
collection.docs << doc = Jekyll::Document.new(path, site: site, collection: collection)
|
155
|
-
doc.data['uuid'] = SecureRandom.uuid
|
156
|
-
end
|
157
|
-
|
158
|
-
thumbnail = post.dig(:data, '_thumbnail_id')&.to_i
|
159
|
-
|
160
|
-
doc.data['layout'] = type
|
161
|
-
doc.data['title'] = post[:title]
|
162
|
-
doc.data['draft'] = post[:status] != 'publish'
|
163
|
-
doc.data['id'] = post[:id]
|
164
|
-
doc.data['date'] = Jekyll::Utils.parse_date(post[:date])
|
165
|
-
doc.data['tags'] = post[:meta].select { |k| k[:type] == 'post_tag' }.map { |k| k[:name] }
|
166
|
-
doc.data['categories'] = post[:meta].select { |k| k[:type] == 'category' }.map { |k| k[:name] }
|
167
|
-
doc.data['image'] = attachments[thumbnail] if thumbnail
|
168
|
-
|
169
|
-
doc.content = ReverseMarkdown.convert(sanitizer.sanitize(post[:content]))
|
170
|
-
doc.save
|
171
|
-
end
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|