sutty-migration 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ffe46cc7d270c7f30d4a505704cb244e507df588bb380ad2c101c3a01675844b
4
- data.tar.gz: d63a9dd7fde09627c61f02f7b714e151f5122d5133e26d8c4d3be7917d35501e
3
+ metadata.gz: 5486653e0e1eb13f5c4c4f85235c875c782fee7be37a1bee9e4cdd84d5879d0a
4
+ data.tar.gz: '096ab9a992ad5b4cf36bb765a4eb99bd4ac9f2fc35ec25737906eb7c3abc8fdf'
5
5
  SHA512:
6
- metadata.gz: 9c9278d28ab6d4b862c5cc615941102c5ef8716c30b674093dd31f5a6dce1c337ff6729fab69e6018263a113b3b82f8faadc9b8aad2a9bc38cfd472d082d711c
7
- data.tar.gz: a016f1a5ff26e8c8c5c1c95d0393f6ebe497919b0301282d412440779ec44fbd0c06e7b7ba95a8d30169d3c936dd4eaf5d3f8e59e8fdd6c563504b6027125a02
6
+ metadata.gz: e94245fd5af90a7411b842e13c44a5f85bbbe2544449de98eaa9c52dbb70f095938754bb65dea382f5275df26b6753c37c5cea24c564ff8f98ad6a0f29406e0e
7
+ data.tar.gz: f415da3e9c4ebee1ec8a6101676ae17a319d05ee248c8360d834d7f321190e791eb8274eb6fa0c5fcc74be5c1d2e1b77195894cb9179c3e33626bd090636327b
data/README.md CHANGED
@@ -23,6 +23,7 @@ Add the plugin to your `_config.yml`:
23
23
  ```yaml
24
24
  plugins:
25
25
  - sutty-migration
26
+ array_separator: ','
26
27
  ```
27
28
 
28
29
  Compile a CSV file with the following required fields:
@@ -66,6 +67,103 @@ To start migration just build your site:
66
67
  bundle exec jekyll build
67
68
  ```
68
69
 
70
+ **Tip:** Files can also be JSON, TSV and YAML, since they're all
71
+ supported by Jekyll.
72
+
73
+ ### Wordpress
74
+
75
+ Instead of requiring you to install and configure MariaDB/MySQL, you can
76
+ convert the database into SQLite3 like this:
77
+
78
+ ```bash
79
+ git clone https://0xacab.org/sutty/mysql2sqlite.git
80
+ cd mysql2sqlite
81
+ ./mysql2sqlite /path/to/database/dump.sql |
82
+ sed -re "s/, 0x([0-9a-f]+),/, X'\1',/i" |
83
+ sqlite3 wordpress.sqlite3
84
+ ```
85
+
86
+ It will probably show some errors.
87
+
88
+ Note the `sed` command is required to convert hexadecimal values into
89
+ SQLite syntax, since `mysql2sqlite` doesn't support this yet.
90
+
91
+ Wordpress websites can include lots of posts and metadata, depending on
92
+ the amount of plugins installed. We don't have an official way of
93
+ dumping everything into Jekyll, because you will probably want to move
94
+ things around. You can write a plugin like this:
95
+
96
+ ```ruby
97
+ # _plugins/wordpress.rb
98
+ # frozen_string_literal: true
99
+
100
+ require 'sutty_migration/wordpress'
101
+ require 'sutty_migration/jekyll/document_creator'
102
+ require 'jekyll-write-and-commit-changes'
103
+
104
+ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
105
+ wp = SuttyMigration::Wordpress.new(site: site, database: 'wordpress.sqlite3', prefix: 'wp_', url: 'https://wordpre.ss')
106
+
107
+ # Download all files
108
+ wp.download_all
109
+
110
+ wp.posts(layout: 'post').each do |post|
111
+ doc = Jekyll::Document.create(site: site, title: post[:post_title], date: post[:post_date], collection: 'posts')
112
+ doc.content = post[:content]
113
+ doc.save
114
+ end
115
+ end
116
+ ```
117
+
118
+ ### WordPress XML
119
+
120
+ If you have the XML dump from a WordPress site, you can migrate content
121
+ by writing a migration plugin.
122
+
123
+ ```ruby
124
+ # frozen_string_literal: true
125
+
126
+ require 'sutty_migration/jekyll/document_creator'
127
+ require 'sutty_migration/wordpress_xml'
128
+ require 'jekyll-write-and-commit-changes'
129
+ require 'securerandom'
130
+
131
+ # Run after reading the site
132
+ Jekyll::Hooks.register :site, :post_read do |site|
133
+ # Put the XML dump at _files/wordpress.xml
134
+ xml = SuttyMigration::WordpressXml.new site: site, file: '_files/wordpress.xml'
135
+
136
+ # Download all files
137
+ xml.attachments.values.map(&:download)
138
+
139
+ # Migrate posts. You can move metadata around and recover
140
+ # relationships or any info your theme requires.
141
+ xml.posts.values.each do |post|
142
+ # Update documents already migrated.
143
+ doc = Jekyll::Document.find_or_create(site: site, collection: locale, title: post.title, slug: post.slug, date: post.date)
144
+ # Don't change the UUIDv4
145
+ d.data['uuid'] ||= SecureRandom.uuid
146
+ d.data['draft'] = post.draft?
147
+ d.data['layout'] = 'post'
148
+ d.data['last_modified_at'] = post.last_modified_at
149
+
150
+ d.data['categories'] = post.categories.map { |c| c[:title] }
151
+ d.data['tags'] = post.tags.map { |t| t[:title] }
152
+
153
+ d.data['author'] = post.author[:email]
154
+ d.data['description'] = post.description
155
+ d.content = post.content
156
+
157
+ doc.save
158
+ rescue => e
159
+ Jekyll.logger.warn "Couldn't migrate #{post.title}"
160
+ end
161
+
162
+ exit # Stop here
163
+ end
164
+ ```
165
+
166
+
69
167
  ## Contributing
70
168
 
71
169
  Bug reports and pull requests are welcome on 0xacab.org at
@@ -1,71 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'securerandom'
4
- require 'fast_blank'
5
- require 'jekyll-write-and-commit-changes'
6
-
7
- Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
8
- documents = site.documents
9
-
10
- site.data['layouts']&.each do |name, layout|
11
- site.data.dig('migration', name)&.each do |row|
12
- row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
13
-
14
- if row['id']
15
- document = documents.find do |doc|
16
- doc.data['id'] == row['id']
17
- end
18
- end
19
-
20
- document ||=
21
- begin
22
- base = "#{row['date'] || Date.today.to_s}-#{Jekyll::Utils.slugify(row['title'], mode: 'latin')}.markdown"
23
- path = File.join(site.source, '_posts', base)
24
-
25
- raise ArgumentError, "Row #{row['id']} duplicates file #{base}" if File.exist? path
26
-
27
- doc = Jekyll::Document.new(path, site: site, collection: site.collections['posts'])
28
- site.collections['posts'] << doc
29
-
30
- doc
31
- end
32
-
33
- row.each do |attribute, value|
34
- row[attribute] =
35
- case layout.dig(attribute, 'type')
36
- when 'string' then value
37
- when 'text' then value
38
- when 'tel' then value
39
- when 'color' then value # TODO: validar
40
- when 'date' then Jekyll::Utils.parse_date(value)
41
- when 'email' then value # TODO: validar
42
- when 'url' then value # TODO: validar
43
- when 'content' then value
44
- when 'markdown_content' then value
45
- when 'markdown' then value
46
- when 'number' then value.to_i
47
- when 'order' then value.to_i
48
- when 'boolean' then !value.strip.empty?
49
- when 'array' then value.split(',').map(&:strip)
50
- # TODO: procesar los valores en base a los valores predefinidos
51
- when 'predefined_array' then value.split(',').map(&:strip)
52
- when 'image' then { 'path' => value, 'description' => '' }
53
- when 'file' then { 'path' => value, 'description' => '' }
54
- when 'geo' then %w[lat lng].zip(value.split(',', 2).map(&:to_f)).to_h
55
- when 'belongs_to' then value
56
- when 'has_many' then value.split(',').map(&:strip)
57
- when 'has_and_belongs_to_many' then value.split(',').map(&:strip)
58
- when 'related_posts' then value.split(',').map(&:strip)
59
- when 'locales' then value.split(',').map(&:strip)
60
- else value
61
- end
62
- end
63
-
64
- document.data['uuid'] ||= SecureRandom.uuid
65
- document.content = row.delete('content')
66
-
67
- document.data.merge! row
68
- document.save
69
- end
70
- end
71
- end
3
+ require_relative 'sutty_migration/data'
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Expandir String para poder verificar si está vacía
4
+ require 'fast_blank'
5
+
6
+ # Verificar que los valores nulos estén vacíos
7
+ class NilClass
8
+ def blank?
9
+ true
10
+ end
11
+
12
+ def present?
13
+ false
14
+ end
15
+ end
16
+
17
+ # Verificar que una fecha está vacía
18
+ class Time
19
+ def blank?
20
+ false
21
+ end
22
+
23
+ def present?
24
+ true
25
+ end
26
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+ require_relative 'core_extensions'
5
+ require_relative 'jekyll/document_creator'
6
+
7
+ # Registers a plugin for converting CSV files into posts following
8
+ # Sutty's layout definition.
9
+ #
10
+ # If jekyll-write-and-commit-changes is enabled, documents will be saved
11
+ # on disk and commited is the build command is run with
12
+ # JEKYLL_ENV=production
13
+ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
14
+ documents = site.documents
15
+
16
+ array_separator = site.config.fetch('array_separator', ',')
17
+
18
+ site.data['layouts']&.each do |name, layout|
19
+ site.data.dig('migration', name)&.each do |row|
20
+ row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
21
+ row['date'] ||= Time.now
22
+
23
+ unless row['id'].blank?
24
+ document = documents.find do |doc|
25
+ doc.data['id'] == row['id']
26
+ end
27
+ end
28
+
29
+ document ||= Jekyll::Document.create(site: site, collection: 'posts',
30
+ **row.slice(*%w[date slug title]).transform_keys(&:to_sym))
31
+
32
+ row.each do |attribute, value|
33
+ next unless value.blank?
34
+
35
+ row[attribute] =
36
+ case layout.dig(attribute, 'type')
37
+ when 'string' then value
38
+ when 'text' then value
39
+ when 'tel' then value
40
+ # TODO: validate
41
+ when 'color' then value
42
+ when 'date' then Jekyll::Utils.parse_date(value)
43
+ # TODO: validate
44
+ when 'email' then value
45
+ # TODO: validate
46
+ when 'url' then value
47
+ when 'content' then value
48
+ when 'markdown_content' then value
49
+ when 'markdown' then value
50
+ when 'number' then value.to_i
51
+ when 'order' then value.to_i
52
+ when 'boolean' then !value.strip.empty?
53
+ when 'array' then value.split(array_separator).map(&:strip)
54
+ # TODO: process values from the default array
55
+ when 'predefined_array' then value.split(array_separator).map(&:strip)
56
+ when 'image' then { 'path' => value, 'description' => '' }
57
+ when 'file' then { 'path' => value, 'description' => '' }
58
+ when 'geo' then %w[lat lng].zip(value.split(array_separator, 2).map(&:to_f)).to_h
59
+ when 'belongs_to' then value
60
+ when 'has_many' then value.split(array_separator).map(&:strip)
61
+ when 'has_and_belongs_to_many' then value.split(array_separator).map(&:strip)
62
+ when 'related_posts' then value.split(array_separator).map(&:strip)
63
+ when 'locales' then value.split(array_separator).map(&:strip)
64
+ else value
65
+ end
66
+ end
67
+
68
+ document.data['uuid'] ||= SecureRandom.uuid
69
+ document.content = row.delete('content')
70
+
71
+ document.data.merge! row
72
+ document.save if document.respond_to? :save
73
+ end
74
+ end
75
+
76
+ next unless site.respond_to?(:repository)
77
+ next unless ENV['JEKYLL_ENV'] == 'production'
78
+
79
+ site.repository.commit 'CSV Migration'
80
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'jekyll/utils'
4
+ require_relative '../core_extensions'
5
+
6
+ module SuttyMigration
7
+ module Jekyll
8
+ module DocumentCreator
9
+ class DocumentExists < ArgumentError; end
10
+
11
+ def self.included(base)
12
+ base.class_eval do
13
+ class << self
14
+ # Creates a new document in a collection or fails if it already
15
+ # exists.
16
+ #
17
+ # @param :site [Jekyll::Site] Jekyll site
18
+ # @param :date [Time] Post date
19
+ # @param :title [String] Post title
20
+ # @param :slug [String] Post slug, slugified title if empty
21
+ # @param :collection [Jekyll::Collection,String] Collection label or collection
22
+ # @return [Jekyll::Document] A new document
23
+ def create(site:, date:, title:, collection:, slug: nil)
24
+ collection = site.collections[collection] if collection.is_a? String
25
+ slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
26
+ basename = "#{date.strftime('%F')}-#{slug}.markdown"
27
+ path = File.join(collection.directory, basename)
28
+
29
+ raise DocumentExists, "#{path} already exists" if File.exist? path
30
+
31
+ ::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
32
+ collection.docs << document
33
+ document.data['title'] = title
34
+ end
35
+ end
36
+
37
+ # Finds a document by its relative path or creates it if it
38
+ # doesn't exist. Helpful for idempotent migrations (create or
39
+ # update actions)
40
+ #
41
+ # @param :site [Jekyll::Site] Jekyll site
42
+ # @param :date [Time] Post date
43
+ # @param :title [String] Post title
44
+ # @param :slug [String] Post slug, slugified title if empty
45
+ # @param :collection [Jekyll::Collection,String] Collection label or collection
46
+ # @return [Jekyll::Document] The found document or a new one
47
+ def find_or_create(site:, date:, title:, collection:, slug: nil)
48
+ collection = site.collections[collection] if collection.is_a? String
49
+ slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
50
+ basename = "#{date.strftime('%F')}-#{slug}.markdown"
51
+ path = File.join(collection.relative_directory, basename)
52
+
53
+ return find(site: site, relative_path: path) if File.exist?(path)
54
+
55
+ create(site: site, date: date, title: title, slug: slug, collection: collection)
56
+ end
57
+
58
+ # Finds a document by its relative path
59
+ #
60
+ # @param :site [Jekyll::Site]
61
+ # @param :relative_path [String]
62
+ # @return [Jekyll::Document,Nil]
63
+ def find(site:, relative_path:)
64
+ indexed_documents_by_relative_path(site)[relative_path]
65
+ end
66
+
67
+ # Index documents by relative path for faster finding
68
+ #
69
+ # @param [Jekyll::Site]
70
+ # @return [Hash]
71
+ def indexed_documents_by_relative_path(site)
72
+ @indexed_documents_by_relative_path ||= site.documents.reduce({}) do |idx, doc|
73
+ idx.tap do |i|
74
+ i[doc.relative_path] = doc
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ ::Jekyll::Document.include SuttyMigration::Jekyll::DocumentCreator
@@ -0,0 +1,282 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'time'
4
+ require 'securerandom'
5
+ require 'sequel'
6
+ require 'sqlite3'
7
+ require 'json'
8
+ require 'faraday'
9
+ require 'progressbar'
10
+ require 'jekyll/utils'
11
+
12
+ module SuttyMigration
13
+ # Brings posts and attachments from a SQLite3 database. You can
14
+ # convert a MySQL/MariaDB dump by using `mysql2sqlite`.
15
+ #
16
+ # It doesn't convert them into Jekyll posts but allows you to write a
17
+ # migration plugin where you can convert data by yourself. We may add
18
+ # this feature in the future.
19
+ class Wordpress
20
+ attr_reader :site, :prefix, :limit, :url, :wp, :database, :multisite
21
+
22
+ # @param :site [Jekyll::Site] Jekyll site
23
+ # @param :url [String] Wordpress site URL (must be up for downloads)
24
+ # @param :database [String] Database path, by default `_data/wordpress.sqlite3`
25
+ # @param :prefix [String] WP table prefix
26
+ # @param :limit [Integer] Page length
27
+ # @param :multisite [Boolean] Site is multisite
28
+ def initialize(site:, url:, database: nil, prefix: 'wp_', limit: 10, multisite: nil)
29
+ @site = site
30
+ @prefix = prefix.freeze
31
+ @limit = limit.freeze
32
+ @url = url.freeze
33
+ @database = database || File.join(site.source, '_data', 'wordpress.sqlite3')
34
+ @multisite = multisite
35
+ end
36
+
37
+ # Generate database connections for a multisite WP
38
+ #
39
+ # @return [Hash] { "ID" => SuttyMigration::Wordpress }
40
+ def blogs
41
+ @blogs ||= wp["select * from #{prefix}blogs"].to_a.map do |blog|
42
+ url = "https://#{blog[:domain]}#{blog[:path]}"
43
+ pfx = "#{prefix}#{blog[:blog_id]}_" if blog[:blog_id] > 1
44
+ pfx ||= prefix
45
+
46
+ [blog[:blog_id],
47
+ blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit,
48
+ multisite: self))]
49
+ end.to_h
50
+ end
51
+
52
+ def options
53
+ @options ||= wp["select option_name, option_value from #{prefix}options"].to_a.map(&:values).to_h.transform_keys(&:to_sym)
54
+ end
55
+
56
+ # Open the database.
57
+ #
58
+ # @return [Sequel::SQLite::Database]
59
+ def wp
60
+ @wp ||= Sequel.sqlite(database).tap do |db|
61
+ db.extension :pagination
62
+ end
63
+ end
64
+
65
+ # Download all attachments. Adds the local path to them.
66
+ #
67
+ # @param :progress [Boolean] Toggle progress bar
68
+ # @return [Nil]
69
+ def download_all(progress: true)
70
+ posts(layout: 'attachment').each do |attachment|
71
+ attachment[:front_matter]['file_path'] = download(url: attachment[:guid], progress: progress)
72
+ end
73
+ end
74
+
75
+ # Downloads a file if needed, optionally showing a progress bar.
76
+ #
77
+ # @param :url [String] File URL
78
+ # @param :progress [Boolean] Toggle progress bar
79
+ # @return [String] File local path
80
+ def download(url:, progress: true)
81
+ uri = URI(url)
82
+ dest = uri.path.sub(%r{\A/}, '')
83
+ full = File.join(site.source, dest)
84
+
85
+ return dest if File.exist? full
86
+
87
+ ::Jekyll.logger.info "Downloading #{dest}"
88
+
89
+ FileUtils.mkdir_p File.dirname(full)
90
+
91
+ File.open(full, 'w') do |f|
92
+ if progress
93
+ head = Faraday.head(url)
94
+ content_length = head.headers['content-length'].to_i
95
+ progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
96
+ end
97
+
98
+ Faraday.get(url) do |req|
99
+ req.options.on_data = proc do |chunk, downloaded_bytes|
100
+ f.write chunk
101
+
102
+ if progress
103
+ progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ dest
110
+ end
111
+
112
+ # List post types
113
+ #
114
+ # @return [Array]
115
+ def layouts
116
+ @layouts ||= wp["select distinct post_type from #{prefix}posts"].to_a.map(&:values).flatten
117
+ end
118
+
119
+ # Finds all posts optionally filtering by post type. This is not
120
+ # the official Sequel syntax, but it retrieves metadata as objects
121
+ # with a single query (and a sub-query).
122
+ #
123
+ # @param :layout [String] Layout name, one of #layouts
124
+ # @param :with_meta [Boolean] Toggle metadata pulling and conversion
125
+ # @return [Enumerator]
126
+ def posts(**options)
127
+ unless options[:layout].blank? || layouts.include?(options[:layout])
128
+ raise ArgumentError, "#{options[:layout]} must be one of #{layouts.join(', ')}"
129
+ end
130
+
131
+ wp[post_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |p|
132
+ p.map do |post|
133
+ # Sequel parses dates on localtime
134
+ post[:date] = ::Jekyll::Utils.parse_date(post[:date]) unless post[:date].blank?
135
+ unless post[:last_modified_at].blank?
136
+ post[:last_modified_at] =
137
+ ::Jekyll::Utils.parse_date(post[:last_modified_at])
138
+ end
139
+
140
+ post[:front_matter] =
141
+ begin
142
+ unless post[:front_matter].blank?
143
+ JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values do |v|
144
+ v.size == 1 ? v.first : v
145
+ end
146
+ end
147
+ rescue JSON::ParserError
148
+ {}
149
+ end
150
+ post[:terms] =
151
+ begin
152
+ unless post[:terms].blank?
153
+ JSON.parse(post[:terms]).transform_keys(&:to_sym).transform_values do |v|
154
+ v.size == 1 ? v.first : v
155
+ end
156
+ end
157
+ rescue JSON::ParserError
158
+ {}
159
+ end
160
+ end
161
+ end
162
+ end
163
+
164
+ # Brings all users.
165
+ #
166
+ # @param :with_meta [Boolean] include metadata
167
+ # @return [Array]
168
+ def users(**options)
169
+ options[:with_meta] = true unless options.key? :with_meta
170
+
171
+ wp[user_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |u|
172
+ next unless options[:with_meta]
173
+
174
+ u.map do |user|
175
+ user[:meta] = JSON.parse(user[:meta]).transform_keys(&:to_sym) unless user[:meta].blank?
176
+ end
177
+ end
178
+ end
179
+
180
+ private
181
+
182
+ # Finds all users. If it's a multisite WP, we need to check the
183
+ # main table.
184
+ #
185
+ # @param :with_meta [Boolean] include metadata
186
+ # @return [String]
187
+ def user_query(with_meta: true)
188
+ pfx = multisite&.prefix || prefix
189
+
190
+ <<~EOQ
191
+ select
192
+ u.*
193
+ #{', json_group_object(m.meta_key, m.meta_value) as meta' if with_meta}
194
+ from #{pfx}users as u
195
+ #{"left join #{pfx}usermeta as m on m.user_id = u.id" if with_meta}
196
+ group by u.id
197
+ EOQ
198
+ end
199
+
200
+ # Query for posts, optionally bringing metadata as JSON objects.
201
+ #
202
+ # @param :layout [String] Layout name
203
+ # @param :with_meta [Boolean] Query metadata
204
+ # @return [String]
205
+ def post_query(layout: nil, with_meta: true)
206
+ <<~EOQ
207
+ select
208
+ p.ID as id,
209
+ strftime('%Y-%m-%d %H:%M:%S UTC', p.post_date_gmt) as date,
210
+ strftime('%Y-%m-%d %H:%M:%S UTC', p.post_modified_gmt) as last_modified_at,
211
+ p.post_author as author,
212
+ p.post_type as layout,
213
+ p.post_name as slug,
214
+ p.post_title as title,
215
+ p.post_content as content,
216
+ p.post_excerpt as excerpt,
217
+ p.post_status as status,
218
+ p.comment_status as comment_status,
219
+ p.ping_status as ping_status,
220
+ p.post_password as password,
221
+ p.to_ping as to_ping,
222
+ p.pinged as pinged,
223
+ p.post_content_filtered as content_filtered,
224
+ p.post_parent as parent,
225
+ p.guid as guid,
226
+ p.menu_order as menu_order,
227
+ p.post_mime_type as mime_type,
228
+ p.comment_count as comment_count
229
+ #{', f.front_matter as front_matter' if with_meta}
230
+ #{', t.terms as terms' if with_meta}
231
+ from #{prefix}posts as p
232
+ #{"left join (#{meta_query}) as f on f.post_id = p.ID" if with_meta}
233
+ #{"left join (#{terms_query}) as t on t.post_id = p.ID" if with_meta}
234
+ #{"where p.post_type = '#{layout}'" if layout}
235
+ group by p.ID
236
+ EOQ
237
+ end
238
+
239
+ # Recover the post meta as a JSON object with multiple values
240
+ # converted to arrays
241
+ #
242
+ # @return [String]
243
+ def meta_query
244
+ <<~EOQ
245
+ select
246
+ post_id,
247
+ json_group_object(meta_key, json(meta_values)) as front_matter
248
+ from (
249
+ select
250
+ post_id,
251
+ meta_key,
252
+ json_group_array(meta_value) as meta_values
253
+ from #{prefix}postmeta
254
+ group by post_id, meta_key
255
+ )
256
+ group by post_id
257
+ EOQ
258
+ end
259
+
260
+ # Term taxonomy query
261
+ #
262
+ # @param :layout [String] Layout name
263
+ # @return [String]
264
+ def terms_query
265
+ <<~EOQ
266
+ select
267
+ post_id,
268
+ json_group_object(taxonomy, json(terms)) as terms
269
+ from (
270
+ select
271
+ r.object_id as post_id,
272
+ tt.taxonomy,
273
+ json_group_array(t.name) as terms
274
+ from #{prefix}term_relationships as r
275
+ left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
276
+ left join #{prefix}terms as t on t.term_id = tt.term_id
277
+ group by r.object_id)
278
+ group by post_id
279
+ EOQ
280
+ end
281
+ end
282
+ end
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require_relative 'wordpress_xml/post'
5
+ require_relative 'wordpress_xml/attachment'
6
+
7
+ module SuttyMigration
8
+ # Understands the XML dump generated by Wordpress and creates
9
+ # Jekyll::Documents
10
+ class WordpressXml
11
+ attr_reader :site, :file, :xml
12
+
13
+ # @param :site [Jekyll::Site] Jekyll site
14
+ # @param :file [String] File path
15
+ def initialize(site:, file:)
16
+ @site = site
17
+ @file = file
18
+ @xml = Nokogiri::XML File.read(file)
19
+
20
+ # Make things easier by removing namespaces.
21
+ xml.remove_namespaces!
22
+ end
23
+
24
+ def inspect
25
+ '#<SuttyMigration::WordpressXml>'
26
+ end
27
+
28
+ # Site URL
29
+ #
30
+ # @return [String]
31
+ def url
32
+ @url ||= attribute_value(xml, 'channel > link')
33
+ end
34
+
35
+ # Site title
36
+ #
37
+ # @return [String]
38
+ def title
39
+ @title ||= attribute_value(xml, 'channel > title')
40
+ end
41
+
42
+ # Description
43
+ #
44
+ # @return [String]
45
+ def description
46
+ @description ||= attribute_value(xml, 'channel > description')
47
+ end
48
+
49
+ # Language
50
+ #
51
+ # TODO: Migrate multilanguage sites.
52
+ #
53
+ # @return [String]
54
+ def language
55
+ @language ||= attribute_value(xml, 'channel > language')
56
+ end
57
+
58
+ # Authors with attributes, indexed by author email.
59
+ #
60
+ # @return [Hash]
61
+ def authors
62
+ @authors ||= xml.css('channel > author').map do |author|
63
+ {
64
+ attribute_value(author, 'author_email') => {
65
+ id: attribute_value(author, 'author_id').to_i,
66
+ display_name: attribute_value(author, 'author_display_name'),
67
+ first_name: attribute_value(author, 'author_first_name'),
68
+ last_name: attribute_value(author, 'author_last_name'),
69
+ email: attribute_value(author, 'author_email')
70
+
71
+ }
72
+ }
73
+ end.reduce(&:merge)
74
+ end
75
+
76
+ # Categories with attributes, indexed by slug ("nicename")
77
+ #
78
+ # @return [Hash]
79
+ def categories
80
+ @categories ||= xml.css('channel > category').map do |category|
81
+ {
82
+ attribute_value(category, 'category_nicename') => {
83
+ id: attribute_value(category, 'term_id').to_i,
84
+ title: attribute_value(category, 'cat_name'),
85
+ parent: attribute_value(category, 'category_parent'),
86
+ slug: attribute_value(category, 'category_nicename')
87
+ }
88
+ }
89
+ end.reduce(&:merge)
90
+ end
91
+
92
+ # Tags with attributes, indexed by slug
93
+ #
94
+ # @return [Hash]
95
+ def tags
96
+ @tags ||= xml.css('channel > tag').map do |tag|
97
+ {
98
+ attribute_value(tag, 'tag_slug') => {
99
+ id: attribute_value(tag, 'term_id').to_i,
100
+ title: attribute_value(tag, 'tag_name'),
101
+ slug: attribute_value(tag, 'tag_slug')
102
+ }
103
+ }
104
+ end.reduce(&:merge)
105
+ end
106
+
107
+ # Posts, indexed by ID
108
+ #
109
+ # @return [Hash]
110
+ def posts
111
+ @posts ||= items_find_by('post_type', 'post').map do |post|
112
+ { attribute_value(post, 'post_id').to_i => Post.new(wordpress: self, item: post) }
113
+ end.reduce(&:merge)
114
+ end
115
+
116
+ # Pages, indexed by ID
117
+ #
118
+ # @return [Hash]
119
+ def pages
120
+ @pages ||= items_find_by('post_type', 'page').map do |page|
121
+ { attribute_value(page, 'post_id').to_i => Post.new(wordpress: self, item: page) }
122
+ end.reduce(&:merge)
123
+ end
124
+
125
+ # Attachments, indexed by ID
126
+ #
127
+ # @return [Hash]
128
+ def attachments
129
+ @attachments ||= items_find_by('post_type', 'attachment').map do |attachment|
130
+ { attribute_value(attachment, 'post_id').to_i => Attachment.new(wordpress: self, item: attachment) }
131
+ end.reduce(&:merge)
132
+ end
133
+
134
+ # Find items by attribute and value
135
+ #
136
+ # @param [String] Attribute name
137
+ # @param [String] Attribute value
138
+ # @return [Nokogiri::NodeSet]
139
+ def items_find_by(attribute, value)
140
+ xml.css('channel > item').select do |item|
141
+ attribute_value(item, attribute) == value
142
+ end
143
+ end
144
+
145
+ # Get element's attribute value
146
+ #
147
+ # @param [Nokogiri::XML::Element]
148
+ # @param [String]
149
+ # @return [String]
150
+ def attribute_value(element, attribute)
151
+ element.at_css(attribute).text
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'post'
4
+ require 'php-serialize'
5
+ require 'faraday'
6
+ require 'progressbar'
7
+
8
+ module SuttyMigration
9
+ class WordpressXml
10
+ # Represents an attachment or uploaded file.
11
+ class Attachment < Post
12
+ # File URL
13
+ #
14
+ # @return [String]
15
+ def attachment_url
16
+ @attachment_url ||= attribute_value 'attachment_url'
17
+ end
18
+
19
+ # File destination
20
+ #
21
+ # @return [String]
22
+ def dest
23
+ @dest ||= URI(attachment_url).path.sub(%r{\A/}, '')
24
+ end
25
+
26
+ # Metadata, with file information as a Hash
27
+ #
28
+ # @return [Hash]
29
+ def meta
30
+ super.tap do |m|
31
+ m['_wp_attachment_metadata'] = PHP.unserialize m['_wp_attachment_metadata']
32
+ end
33
+ end
34
+
35
+ # Download the file if it doesn't exist. Optionally show a
36
+ # progress bar.
37
+ #
38
+ # @param :progress [Boolean]
39
+ # @return [Boolean]
40
+ def download(progress: true)
41
+ return true if File.exist? dest
42
+
43
+ ::Jekyll.logger.info "Downloading #{dest}"
44
+
45
+ FileUtils.mkdir_p File.dirname(dest)
46
+
47
+ File.open(dest, 'w') do |f|
48
+ if progress
49
+ head = Faraday.head(attachment_url)
50
+ content_length = head.headers['content-length'].to_i
51
+ progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
52
+ end
53
+
54
+ Faraday.get(attachment_url) do |req|
55
+ req.options.on_data = proc do |chunk, downloaded_bytes|
56
+ f.write chunk
57
+
58
+ if progress
59
+ progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ File.exist? dest
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wordpress_formatting/wpautop'
4
+ require 'jekyll/utils'
5
+
6
+ module SuttyMigration
7
+ class WordpressXml
8
+ # Represents a WordPress post
9
+ class Post
10
+ attr_reader :wordpress, :item
11
+
12
+ # @param :wordpress [SuttyMigration::WordpressXml]
13
+ # @param :item [Nokogiri::XML::Element]
14
+ def initialize(wordpress:, item:)
15
+ @wordpress = wordpress
16
+ @item = item
17
+ end
18
+
19
+ def inspect
20
+ "#<SuttyMigration::WordpressXml::Post title=\"#{title}\">"
21
+ end
22
+
23
+ # Post ID
24
+ #
25
+ # @return [Integer]
26
+ def id
27
+ @id ||= attribute_value('post_id').to_i
28
+ end
29
+
30
+ # Permalink. Absolute URL to the post.
31
+ #
32
+ # @return [String]
33
+ def permalink
34
+ @permalink ||= attribute_value('link').sub(wordpress.url, '')
35
+ end
36
+
37
+ # Title
38
+ #
39
+ # @return [String]
40
+ def title
41
+ @title ||= attribute_value('title')
42
+ end
43
+
44
+ # Description
45
+ #
46
+ # @return [String]
47
+ def description
48
+ @description ||= attribute_value('description')
49
+ end
50
+
51
+ # Slug ("post name")
52
+ #
53
+ # @return [String]
54
+ def slug
55
+ @slug ||= attribute_value('post_name')
56
+ end
57
+
58
+ # Publication date.
59
+ #
60
+ # WordPress can store this date in three different fields and
61
+ # sometimes they come empty or invalid.
62
+ #
63
+ # @return [Time]
64
+ def date
65
+ @date ||= %w[pubDate post_date_gmt post_date].map do |date_attr|
66
+ ::Jekyll::Utils.parse_date attribute_value(date_attr)
67
+ rescue StandardError
68
+ end.compact.first
69
+ end
70
+
71
+ # Modification date.
72
+ #
73
+ # @return [Time]
74
+ def last_modified_at
75
+ @last_modified_at ||= ::Jekyll::Utils.parse_date attribute_value('post_modified_gmt')
76
+ end
77
+
78
+ # Content as HTML, with site URL removed.
79
+ #
80
+ # @return [String]
81
+ def content
82
+ @content ||= WordpressFormatting::Wpautop.wpautop(attribute_value('encoded')).gsub(
83
+ / (href|src)="#{wordpress.url}/, ' \\1="'
84
+ )
85
+ end
86
+
87
+ # Author attributes.
88
+ #
89
+ # @return [Hash]
90
+ def author
91
+ @author ||= wordpress.authors[attribute_value('creator')]
92
+ end
93
+
94
+ # Post password. Use with jekyll-crypto.
95
+ #
96
+ # @return [String]
97
+ def password
98
+ @password ||= attribute_value 'post_password'
99
+ end
100
+
101
+ # Tags with attributes.
102
+ #
103
+ # @return [Hash]
104
+ def tags
105
+ @tags ||= item.css('category').select do |c|
106
+ c[:domain] == 'post_tag'
107
+ end.map do |c|
108
+ wordpress.tags[c[:nicename]]
109
+ end
110
+ end
111
+
112
+ # Categories with attributes.
113
+ #
114
+ # @return [Hash]
115
+ def categories
116
+ @categories ||= item.css('category').select do |c|
117
+ c[:domain] == 'category'
118
+ end.map do |c|
119
+ wordpress.categories[c[:nicename]]
120
+ end
121
+ end
122
+
123
+ # Metadata. Plugins store useful information here. Duplicated
124
+ # keys are returned as an Array of values.
125
+ #
126
+ # @return [Hash]
127
+ def meta
128
+ @meta ||= {}.tap do |meta|
129
+ item.css('postmeta').each do |m|
130
+ key = m.css('meta_key').text
131
+ value = m.css('meta_value').text
132
+
133
+ case meta[key]
134
+ when nil then meta[key] = value
135
+ when String then meta[key] = [meta[key], value]
136
+ when Array then meta[key] << value
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ # Order. Higher are sorted on top by jekyll-order.
143
+ #
144
+ # @return [Integer]
145
+ def order
146
+ @order ||= attribute_value 'is_sticky'
147
+ end
148
+
149
+ # Publication status
150
+ #
151
+ # @return [Boolean]
152
+ def published?
153
+ @published ||= attribute_value('status') == 'publish'
154
+ end
155
+
156
+ # Publication status
157
+ #
158
+ # @return [Boolean]
159
+ def draft?
160
+ @draft ||= attribute_value('status') == 'draft'
161
+ end
162
+
163
+ # Get a value from the attribute
164
+ #
165
+ # @return [String]
166
+ def attribute_value(key)
167
+ item.at_css(key).text
168
+ end
169
+ end
170
+ end
171
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sutty-migration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - f
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-28 00:00:00.000000000 Z
11
+ date: 2021-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jekyll
@@ -52,6 +52,118 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: faraday
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.4'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.4'
69
+ - !ruby/object:Gem::Dependency
70
+ name: progressbar
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.11'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.11'
83
+ - !ruby/object:Gem::Dependency
84
+ name: sqlite3
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.4'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.4'
97
+ - !ruby/object:Gem::Dependency
98
+ name: sequel
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '5.45'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '5.45'
111
+ - !ruby/object:Gem::Dependency
112
+ name: wordpress-formatting
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.1.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.1.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: nokogiri
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 1.12.0
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 1.12.0
139
+ - !ruby/object:Gem::Dependency
140
+ name: php-serialize
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 1.3.0
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 1.3.0
153
+ - !ruby/object:Gem::Dependency
154
+ name: pry
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
55
167
  description: Takes datafiles and converts them into posts
56
168
  email:
57
169
  - f@sutty.nl
@@ -64,7 +176,13 @@ files:
64
176
  - LICENSE.txt
65
177
  - README.md
66
178
  - lib/sutty-migration.rb
67
- - lib/wordpress.rb
179
+ - lib/sutty_migration/core_extensions.rb
180
+ - lib/sutty_migration/data.rb
181
+ - lib/sutty_migration/jekyll/document_creator.rb
182
+ - lib/sutty_migration/wordpress.rb
183
+ - lib/sutty_migration/wordpress_xml.rb
184
+ - lib/sutty_migration/wordpress_xml/attachment.rb
185
+ - lib/sutty_migration/wordpress_xml/post.rb
68
186
  homepage: https://0xacab.org/sutty/jekyll/sutty-migration
69
187
  licenses:
70
188
  - GPL-3.0
data/lib/wordpress.rb DELETED
@@ -1,174 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Debug
4
- require 'pry'
5
- # Generar UUIDs
6
- require 'securerandom'
7
- # Traer resultados de la base de datos
8
- require 'sequel'
9
- require 'sqlite3'
10
- require 'json'
11
- # Limpieza de contenido
12
- require 'loofah'
13
- require 'rails/html/scrubbers'
14
- require 'rails/html/sanitizer'
15
- require 'reverse_markdown'
16
- # Descargar archivos
17
- require 'faraday'
18
-
19
- class Wordpress
20
- attr_reader :site, :prefix, :limit, :url
21
-
22
- def initialize(site:, url:, prefix: 'wp_', limit: 10)
23
- @site = site
24
- @prefix = prefix.freeze
25
- @limit = limit.freeze
26
- @url = url.freeze
27
-
28
- # Conectarse a la base de datos
29
- @wp = Sequel.sqlite(File.join(site.source, '_data', 'wordpress', 'post.sqlite3'))
30
- # Las funciones de JSON usan mucha CPU, vamos a traer de a pocos
31
- # registros.
32
- @wp.extension :pagination
33
- end
34
-
35
- def download(file)
36
- dest = 'wp-content/uploads/' + file
37
- full = File.join(site.source, dest)
38
-
39
- return dest if File.exist? full
40
-
41
- Jekyll.logger.info "Downloading #{dest}"
42
-
43
- FileUtils.mkdir_p File.dirname(full)
44
-
45
- File.open(full, 'w') do |f|
46
- Faraday.get(url + '/' + dest) do |req|
47
- req.options.on_data = Proc.new do |chunk, _|
48
- f.write chunk
49
- end
50
- end
51
- end
52
-
53
- dest
54
- end
55
-
56
- # Obtiene todos los posts opcionalmente filtrando por tipo de post.
57
- # No es la forma oficial de Sequel pero no tenemos tiempo de
58
- # aprenderla específicamente y además tenemos las opciones en formato
59
- # JSON que no estarían soportadas.
60
- def posts(layout: nil)
61
- query = post_query.dup
62
- query += " where post_type = '#{layout}'" if layout
63
- query += ' group by posts.ID'
64
-
65
- @wp[query].each_page(limit)
66
- end
67
-
68
- def meta(id:)
69
- @wp[meta_query(id: id)].to_a
70
- end
71
-
72
- private
73
-
74
- # Obtener todos los posts, json_objectagg requiere mariadb 10.5
75
- def post_query
76
- @post_query ||= <<~EOQ
77
- select ID as id,
78
- post_title as title,
79
- post_name as slug,
80
- post_type as layout,
81
- strftime('%Y-%m-%d', post_date) as date,
82
- post_status as status,
83
- post_content as content,
84
- json_group_object(meta_key, meta_value) as data
85
- from #{prefix}posts as posts
86
- left join #{prefix}postmeta as frontmatter
87
- on posts.ID = frontmatter.post_id
88
- EOQ
89
- end
90
-
91
- def meta_query(id:)
92
- <<~EOQ
93
- SELECT
94
- terms.name AS `name`,
95
- ttax.taxonomy AS `type`,
96
- ttax.parent AS `parent`,
97
- ttax.term_id AS `id`
98
- FROM
99
- #{prefix}terms AS `terms`,
100
- #{prefix}term_relationships AS `trels`,
101
- #{prefix}term_taxonomy AS `ttax`
102
- WHERE
103
- trels.object_id = '#{id}' AND
104
- trels.term_taxonomy_id = ttax.term_taxonomy_id AND
105
- terms.term_id = ttax.term_id
106
- EOQ
107
- end
108
- end
109
-
110
- # Antes de generar el sitio vamos a leer todos los artículos desde la
111
- # base de datos y generarlos localmente.
112
- Jekyll::Hooks.register :site, :post_read do |site|
113
- wp = Wordpress.new(site: site,
114
- url: site.config.dig('wordpress', 'url'),
115
- prefix: site.config.dig('wordpress', 'prefix'))
116
-
117
- collection = site.collections['posts']
118
- ascii_re = Regexp.new("\P{ASCII}").freeze
119
- sanitizer = Rails::Html::SafeListSanitizer.new
120
-
121
- # Traer todas las imágenes cargadas y descargarlas
122
- attachments = wp.posts(layout: 'attachment').map do |page|
123
- page.map do |attachment|
124
- attachment[:data] = JSON.parse(attachment[:data]) unless attachment[:data].nil?
125
- file = attachment.dig(:data, '_wp_attached_file')
126
-
127
- next unless file
128
-
129
- dest = wp.download(file)
130
-
131
- # Tener un mapa de IDs y archivos destino
132
- [ attachment[:id], dest ]
133
- end
134
- end.compact.flatten(1).to_h
135
-
136
- %w[post page].each do |type|
137
- wp.posts(layout: type).each do |page|
138
- page.each do |post|
139
- # Convertir los datos extra en un Hash
140
- post[:data] = JSON.parse(post[:data]) unless post[:data].nil?
141
- post[:slug] = Jekyll::Utils.slugify(post[:title], mode: 'latin') if post[:slug].empty?
142
- post[:meta] = wp.meta id: post[:id]
143
-
144
- path = File.join(site.source, '_posts', post.slice(:date, :slug).values.join('-') + '.markdown')
145
-
146
- if File.exist? path
147
- Jekyll.logger.info "#{path} ya fue migrado, actualizando"
148
-
149
- doc = site.documents.find do |d|
150
- d['id'] == post[:id]
151
- end
152
- else
153
- # Crear un post nuevo y agregarlo a la colección
154
- collection.docs << doc = Jekyll::Document.new(path, site: site, collection: collection)
155
- doc.data['uuid'] = SecureRandom.uuid
156
- end
157
-
158
- thumbnail = post.dig(:data, '_thumbnail_id')&.to_i
159
-
160
- doc.data['layout'] = type
161
- doc.data['title'] = post[:title]
162
- doc.data['draft'] = post[:status] != 'publish'
163
- doc.data['id'] = post[:id]
164
- doc.data['date'] = Jekyll::Utils.parse_date(post[:date])
165
- doc.data['tags'] = post[:meta].select { |k| k[:type] == 'post_tag' }.map { |k| k[:name] }
166
- doc.data['categories'] = post[:meta].select { |k| k[:type] == 'category' }.map { |k| k[:name] }
167
- doc.data['image'] = attachments[thumbnail] if thumbnail
168
-
169
- doc.content = ReverseMarkdown.convert(sanitizer.sanitize(post[:content]))
170
- doc.save
171
- end
172
- end
173
- end
174
- end