sutty-migration 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f5a175d5ab6dd98eb4bed7ac987961ce9aaae15355c3eeb8e191805a6192ae34
4
- data.tar.gz: f677b5f7945ba06403239197798d1b37eaea975c2057734c16484a0c75204f48
3
+ metadata.gz: 5486653e0e1eb13f5c4c4f85235c875c782fee7be37a1bee9e4cdd84d5879d0a
4
+ data.tar.gz: '096ab9a992ad5b4cf36bb765a4eb99bd4ac9f2fc35ec25737906eb7c3abc8fdf'
5
5
  SHA512:
6
- metadata.gz: 9cb1bf01c37e40036dffdd19b14b78fbdf538af45625867eb6cbd45ab66bcb468a91b11a13ab86ca83e2d8692235ed0c6d4d919186d3a8713f9c81723c7978a6
7
- data.tar.gz: a3fc41c535e1a028526beb8904be1a72b2eee49473e4036059e7b0d4bbeff9b8670bf20b564636709605200aeede0c928bf9c9613146339a01803d396c570c05
6
+ metadata.gz: e94245fd5af90a7411b842e13c44a5f85bbbe2544449de98eaa9c52dbb70f095938754bb65dea382f5275df26b6753c37c5cea24c564ff8f98ad6a0f29406e0e
7
+ data.tar.gz: f415da3e9c4ebee1ec8a6101676ae17a319d05ee248c8360d834d7f321190e791eb8274eb6fa0c5fcc74be5c1d2e1b77195894cb9179c3e33626bd090636327b
data/README.md CHANGED
@@ -23,6 +23,7 @@ Add the plugin to your `_config.yml`:
23
23
  ```yaml
24
24
  plugins:
25
25
  - sutty-migration
26
+ array_separator: ','
26
27
  ```
27
28
 
28
29
  Compile a CSV file with the following required fields:
@@ -114,6 +115,55 @@ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
114
115
  end
115
116
  ```
116
117
 
118
+ ### WordPress XML
119
+
120
+ If you have the XML dump from a WordPress site, you can migrate content
121
+ by writing a migration plugin.
122
+
123
+ ```ruby
124
+ # frozen_string_literal: true
125
+
126
+ require 'sutty_migration/jekyll/document_creator'
127
+ require 'sutty_migration/wordpress_xml'
128
+ require 'jekyll-write-and-commit-changes'
129
+ require 'securerandom'
130
+
131
+ # Run after reading the site
132
+ Jekyll::Hooks.register :site, :post_read do |site|
133
+ # Put the XML dump at _files/wordpress.xml
134
+ xml = SuttyMigration::WordpressXml.new site: site, file: '_files/wordpress.xml'
135
+
136
+ # Download all files
137
+ xml.attachments.values.map(&:download)
138
+
139
+ # Migrate posts. You can move metadata around and recover
140
+ # relationships or any info your theme requires.
141
+ xml.posts.values.each do |post|
142
+ # Update documents already migrated.
143
+ doc = Jekyll::Document.find_or_create(site: site, collection: locale, title: post.title, slug: post.slug, date: post.date)
144
+ # Don't change the UUIDv4
145
+ d.data['uuid'] ||= SecureRandom.uuid
146
+ d.data['draft'] = post.draft?
147
+ d.data['layout'] = 'post'
148
+ d.data['last_modified_at'] = post.last_modified_at
149
+
150
+ d.data['categories'] = post.categories.map { |c| c[:title] }
151
+ d.data['tags'] = post.tags.map { |t| t[:title] }
152
+
153
+ d.data['author'] = post.author[:email]
154
+ d.data['description'] = post.description
155
+ d.content = post.content
156
+
157
+ doc.save
158
+ rescue => e
159
+ Jekyll.logger.warn "Couldn't migrate #{post.title}"
160
+ end
161
+
162
+ exit # Stop here
163
+ end
164
+ ```
165
+
166
+
117
167
  ## Contributing
118
168
 
119
169
  Bug reports and pull requests are welcome on 0xacab.org at
@@ -13,6 +13,8 @@ require_relative 'jekyll/document_creator'
13
13
  Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
14
14
  documents = site.documents
15
15
 
16
+ array_separator = site.config.fetch('array_separator', ',')
17
+
16
18
  site.data['layouts']&.each do |name, layout|
17
19
  site.data.dig('migration', name)&.each do |row|
18
20
  row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
@@ -24,41 +26,42 @@ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
24
26
  end
25
27
  end
26
28
 
27
- document ||= Jekyll::Document.create(site: site, collection: 'posts', **row.slice(*%w[date slug title]).transform_keys(&:to_sym))
29
+ document ||= Jekyll::Document.create(site: site, collection: 'posts',
30
+ **row.slice(*%w[date slug title]).transform_keys(&:to_sym))
28
31
 
29
32
  row.each do |attribute, value|
30
33
  next unless value.blank?
31
34
 
32
35
  row[attribute] =
33
36
  case layout.dig(attribute, 'type')
34
- when 'string' then value
35
- when 'text' then value
36
- when 'tel' then value
37
- # TODO: validate
38
- when 'color' then value
39
- when 'date' then Jekyll::Utils.parse_date(value)
40
- # TODO: validate
41
- when 'email' then value
42
- # TODO: validate
43
- when 'url' then value
44
- when 'content' then value
45
- when 'markdown_content' then value
46
- when 'markdown' then value
47
- when 'number' then value.to_i
48
- when 'order' then value.to_i
49
- when 'boolean' then !value.strip.empty?
50
- when 'array' then value.split(',').map(&:strip)
51
- # TODO: process values from the default array
52
- when 'predefined_array' then value.split(',').map(&:strip)
53
- when 'image' then { 'path' => value, 'description' => '' }
54
- when 'file' then { 'path' => value, 'description' => '' }
55
- when 'geo' then %w[lat lng].zip(value.split(',', 2).map(&:to_f)).to_h
56
- when 'belongs_to' then value
57
- when 'has_many' then value.split(',').map(&:strip)
58
- when 'has_and_belongs_to_many' then value.split(',').map(&:strip)
59
- when 'related_posts' then value.split(',').map(&:strip)
60
- when 'locales' then value.split(',').map(&:strip)
61
- else value
37
+ when 'string' then value
38
+ when 'text' then value
39
+ when 'tel' then value
40
+ # TODO: validate
41
+ when 'color' then value
42
+ when 'date' then Jekyll::Utils.parse_date(value)
43
+ # TODO: validate
44
+ when 'email' then value
45
+ # TODO: validate
46
+ when 'url' then value
47
+ when 'content' then value
48
+ when 'markdown_content' then value
49
+ when 'markdown' then value
50
+ when 'number' then value.to_i
51
+ when 'order' then value.to_i
52
+ when 'boolean' then !value.strip.empty?
53
+ when 'array' then value.split(array_separator).map(&:strip)
54
+ # TODO: process values from the default array
55
+ when 'predefined_array' then value.split(array_separator).map(&:strip)
56
+ when 'image' then { 'path' => value, 'description' => '' }
57
+ when 'file' then { 'path' => value, 'description' => '' }
58
+ when 'geo' then %w[lat lng].zip(value.split(array_separator, 2).map(&:to_f)).to_h
59
+ when 'belongs_to' then value
60
+ when 'has_many' then value.split(array_separator).map(&:strip)
61
+ when 'has_and_belongs_to_many' then value.split(array_separator).map(&:strip)
62
+ when 'related_posts' then value.split(array_separator).map(&:strip)
63
+ when 'locales' then value.split(array_separator).map(&:strip)
64
+ else value
62
65
  end
63
66
  end
64
67
 
@@ -7,31 +7,75 @@ module SuttyMigration
7
7
  module Jekyll
8
8
  module DocumentCreator
9
9
  class DocumentExists < ArgumentError; end
10
+
10
11
  def self.included(base)
11
12
  base.class_eval do
13
+ class << self
14
+ # Creates a new document in a collection or fails if it already
15
+ # exists.
16
+ #
17
+ # @param :site [Jekyll::Site] Jekyll site
18
+ # @param :date [Time] Post date
19
+ # @param :title [String] Post title
20
+ # @param :slug [String] Post slug, slugified title if empty
21
+ # @param :collection [Jekyll::Collection,String] Collection label or collection
22
+ # @return [Jekyll::Document] A new document
23
+ def create(site:, date:, title:, collection:, slug: nil)
24
+ collection = site.collections[collection] if collection.is_a? String
25
+ slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
26
+ basename = "#{date.strftime('%F')}-#{slug}.markdown"
27
+ path = File.join(collection.directory, basename)
12
28
 
13
- # Creates a new document in a collection or fails if it already
14
- # exists.
15
- #
16
- # @param :site [Jekyll::Site] Jekyll site
17
- # @param :date [Time] Post date
18
- # @param :title [String] Post title
19
- # @param :slug [String] Post slug, slugified title if empty
20
- # @param :collection [Jekyll::Collection,String] Collection label or collection
21
- # @return [Jekyll::Document] A new document
22
- def self.create(site:, date:, title:, slug: nil, collection:)
23
- collection = site.collections[collection] if collection.is_a? String
24
- slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
25
- basename = "#{date.strftime('%F')}-#{slug}.markdown"
26
- path = File.join(collection.directory, basename)
27
-
28
- raise DocumentExists, "#{path} already exists" if File.exist? path
29
-
30
- ::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
31
- collection.docs << document
32
- document.data['title'] = title
33
- end
34
- end
29
+ raise DocumentExists, "#{path} already exists" if File.exist? path
30
+
31
+ ::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
32
+ collection.docs << document
33
+ document.data['title'] = title
34
+ end
35
+ end
36
+
37
+ # Finds a document by its relative path or creates it if it
38
+ # doesn't exist. Helpful for idempotent migrations (create or
39
+ # update actions)
40
+ #
41
+ # @param :site [Jekyll::Site] Jekyll site
42
+ # @param :date [Time] Post date
43
+ # @param :title [String] Post title
44
+ # @param :slug [String] Post slug, slugified title if empty
45
+ # @param :collection [Jekyll::Collection,String] Collection label or collection
46
+ # @return [Jekyll::Document] The found document or a new one
47
+ def find_or_create(site:, date:, title:, collection:, slug: nil)
48
+ collection = site.collections[collection] if collection.is_a? String
49
+ slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
50
+ basename = "#{date.strftime('%F')}-#{slug}.markdown"
51
+ path = File.join(collection.relative_directory, basename)
52
+
53
+ return find(site: site, relative_path: path) if File.exist?(path)
54
+
55
+ create(site: site, date: date, title: title, slug: slug, collection: collection)
56
+ end
57
+
58
+ # Finds a document by its relative path
59
+ #
60
+ # @param :site [Jekyll::Site]
61
+ # @param :relative_path [String]
62
+ # @return [Jekyll::Document,Nil]
63
+ def find(site:, relative_path:)
64
+ indexed_documents_by_relative_path(site)[relative_path]
65
+ end
66
+
67
+ # Index documents by relative path for faster finding
68
+ #
69
+ # @param [Jekyll::Site]
70
+ # @return [Hash]
71
+ def indexed_documents_by_relative_path(site)
72
+ @indexed_documents_by_relative_path ||= site.documents.reduce({}) do |idx, doc|
73
+ idx.tap do |i|
74
+ i[doc.relative_path] = doc
75
+ end
76
+ end
77
+ end
78
+ end
35
79
  end
36
80
  end
37
81
  end
@@ -43,7 +43,9 @@ module SuttyMigration
43
43
  pfx = "#{prefix}#{blog[:blog_id]}_" if blog[:blog_id] > 1
44
44
  pfx ||= prefix
45
45
 
46
- [ blog[:blog_id], blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit, multisite: self)) ]
46
+ [blog[:blog_id],
47
+ blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit,
48
+ multisite: self))]
47
49
  end.to_h
48
50
  end
49
51
 
@@ -94,11 +96,11 @@ module SuttyMigration
94
96
  end
95
97
 
96
98
  Faraday.get(url) do |req|
97
- req.options.on_data = Proc.new do |chunk, downloaded_bytes|
99
+ req.options.on_data = proc do |chunk, downloaded_bytes|
98
100
  f.write chunk
99
101
 
100
102
  if progress
101
- progress.progress = (downloaded_bytes > content_length) ? content_length : downloaded_bytes
103
+ progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
102
104
  end
103
105
  end
104
106
  end
@@ -130,10 +132,31 @@ module SuttyMigration
130
132
  p.map do |post|
131
133
  # Sequel parses dates on localtime
132
134
  post[:date] = ::Jekyll::Utils.parse_date(post[:date]) unless post[:date].blank?
133
- post[:last_modified_at] = ::Jekyll::Utils.parse_date(post[:last_modified_at]) unless post[:last_modified_at].blank?
135
+ unless post[:last_modified_at].blank?
136
+ post[:last_modified_at] =
137
+ ::Jekyll::Utils.parse_date(post[:last_modified_at])
138
+ end
134
139
 
135
- post[:front_matter] = JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values { |v| v.size == 1 ? v.first : v } unless post[:front_matter].blank?
136
- post[:terms] = JSON.parse(post[:terms]).transform_keys(&:to_sym) unless post[:terms].blank?
140
+ post[:front_matter] =
141
+ begin
142
+ unless post[:front_matter].blank?
143
+ JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values do |v|
144
+ v.size == 1 ? v.first : v
145
+ end
146
+ end
147
+ rescue JSON::ParserError
148
+ {}
149
+ end
150
+ post[:terms] =
151
+ begin
152
+ unless post[:terms].blank?
153
+ JSON.parse(post[:terms]).transform_keys(&:to_sym).transform_values do |v|
154
+ v.size == 1 ? v.first : v
155
+ end
156
+ end
157
+ rescue JSON::ParserError
158
+ {}
159
+ end
137
160
  end
138
161
  end
139
162
  end
@@ -167,7 +190,7 @@ module SuttyMigration
167
190
  <<~EOQ
168
191
  select
169
192
  u.*
170
- #{", json_group_object(m.meta_key, m.meta_value) as meta" if with_meta}
193
+ #{', json_group_object(m.meta_key, m.meta_value) as meta' if with_meta}
171
194
  from #{pfx}users as u
172
195
  #{"left join #{pfx}usermeta as m on m.user_id = u.id" if with_meta}
173
196
  group by u.id
@@ -203,11 +226,11 @@ module SuttyMigration
203
226
  p.menu_order as menu_order,
204
227
  p.post_mime_type as mime_type,
205
228
  p.comment_count as comment_count
206
- #{", f.front_matter as front_matter" if with_meta}
207
- #{", t.terms as terms" if with_meta}
229
+ #{', f.front_matter as front_matter' if with_meta}
230
+ #{', t.terms as terms' if with_meta}
208
231
  from #{prefix}posts as p
209
232
  #{"left join (#{meta_query}) as f on f.post_id = p.ID" if with_meta}
210
- #{"left join (#{terms_query(layout: layout)}) as t on t.id = p.ID" if with_meta}
233
+ #{"left join (#{terms_query}) as t on t.post_id = p.ID" if with_meta}
211
234
  #{"where p.post_type = '#{layout}'" if layout}
212
235
  group by p.ID
213
236
  EOQ
@@ -238,17 +261,21 @@ module SuttyMigration
238
261
  #
239
262
  # @param :layout [String] Layout name
240
263
  # @return [String]
241
- def terms_query(layout: nil)
264
+ def terms_query
242
265
  <<~EOQ
243
266
  select
244
- p.ID as id,
245
- json_group_object(tt.taxonomy, t.name) as terms
246
- from #{prefix}posts as p
247
- left join #{prefix}term_relationships as r on r.object_id = p.ID
248
- left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
249
- left join #{prefix}terms as t on t.term_id = tt.term_id
250
- #{"where p.post_type = '#{layout}'" if layout}
251
- group by p.ID
267
+ post_id,
268
+ json_group_object(taxonomy, json(terms)) as terms
269
+ from (
270
+ select
271
+ r.object_id as post_id,
272
+ tt.taxonomy,
273
+ json_group_array(t.name) as terms
274
+ from #{prefix}term_relationships as r
275
+ left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
276
+ left join #{prefix}terms as t on t.term_id = tt.term_id
277
+ group by r.object_id)
278
+ group by post_id
252
279
  EOQ
253
280
  end
254
281
  end
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require_relative 'wordpress_xml/post'
5
+ require_relative 'wordpress_xml/attachment'
6
+
7
+ module SuttyMigration
8
+ # Understands the XML dump generated by Wordpress and creates
9
+ # Jekyll::Documents
10
+ class WordpressXml
11
+ attr_reader :site, :file, :xml
12
+
13
+ # @param :site [Jekyll::Site] Jekyll site
14
+ # @param :file [String] File path
15
+ def initialize(site:, file:)
16
+ @site = site
17
+ @file = file
18
+ @xml = Nokogiri::XML File.read(file)
19
+
20
+ # Make things easier by removing namespaces.
21
+ xml.remove_namespaces!
22
+ end
23
+
24
+ def inspect
25
+ '#<SuttyMigration::WordpressXml>'
26
+ end
27
+
28
+ # Site URL
29
+ #
30
+ # @return [String]
31
+ def url
32
+ @url ||= attribute_value(xml, 'channel > link')
33
+ end
34
+
35
+ # Site title
36
+ #
37
+ # @return [String]
38
+ def title
39
+ @title ||= attribute_value(xml, 'channel > title')
40
+ end
41
+
42
+ # Description
43
+ #
44
+ # @return [String]
45
+ def description
46
+ @description ||= attribute_value(xml, 'channel > description')
47
+ end
48
+
49
+ # Language
50
+ #
51
+ # TODO: Migrate multilanguage sites.
52
+ #
53
+ # @return [String]
54
+ def language
55
+ @language ||= attribute_value(xml, 'channel > language')
56
+ end
57
+
58
+ # Authors with attributes, indexed by author email.
59
+ #
60
+ # @return [Hash]
61
+ def authors
62
+ @authors ||= xml.css('channel > author').map do |author|
63
+ {
64
+ attribute_value(author, 'author_email') => {
65
+ id: attribute_value(author, 'author_id').to_i,
66
+ display_name: attribute_value(author, 'author_display_name'),
67
+ first_name: attribute_value(author, 'author_first_name'),
68
+ last_name: attribute_value(author, 'author_last_name'),
69
+ email: attribute_value(author, 'author_email')
70
+
71
+ }
72
+ }
73
+ end.reduce(&:merge)
74
+ end
75
+
76
+ # Categories with attributes, indexed by slug ("nicename")
77
+ #
78
+ # @return [Hash]
79
+ def categories
80
+ @categories ||= xml.css('channel > category').map do |category|
81
+ {
82
+ attribute_value(category, 'category_nicename') => {
83
+ id: attribute_value(category, 'term_id').to_i,
84
+ title: attribute_value(category, 'cat_name'),
85
+ parent: attribute_value(category, 'category_parent'),
86
+ slug: attribute_value(category, 'category_nicename')
87
+ }
88
+ }
89
+ end.reduce(&:merge)
90
+ end
91
+
92
+ # Tags with attributes, indexed by slug
93
+ #
94
+ # @return [Hash]
95
+ def tags
96
+ @tags ||= xml.css('channel > tag').map do |tag|
97
+ {
98
+ attribute_value(tag, 'tag_slug') => {
99
+ id: attribute_value(tag, 'term_id').to_i,
100
+ title: attribute_value(tag, 'tag_name'),
101
+ slug: attribute_value(tag, 'tag_slug')
102
+ }
103
+ }
104
+ end.reduce(&:merge)
105
+ end
106
+
107
+ # Posts, indexed by ID
108
+ #
109
+ # @return [Hash]
110
+ def posts
111
+ @posts ||= items_find_by('post_type', 'post').map do |post|
112
+ { attribute_value(post, 'post_id').to_i => Post.new(wordpress: self, item: post) }
113
+ end.reduce(&:merge)
114
+ end
115
+
116
+ # Pages, indexed by ID
117
+ #
118
+ # @return [Hash]
119
+ def pages
120
+ @pages ||= items_find_by('post_type', 'page').map do |page|
121
+ { attribute_value(page, 'post_id').to_i => Post.new(wordpress: self, item: page) }
122
+ end.reduce(&:merge)
123
+ end
124
+
125
+ # Attachments, indexed by ID
126
+ #
127
+ # @return [Hash]
128
+ def attachments
129
+ @attachments ||= items_find_by('post_type', 'attachment').map do |attachment|
130
+ { attribute_value(attachment, 'post_id').to_i => Attachment.new(wordpress: self, item: attachment) }
131
+ end.reduce(&:merge)
132
+ end
133
+
134
+ # Find items by attribute and value
135
+ #
136
+ # @param [String] Attribute name
137
+ # @param [String] Attribute value
138
+ # @return [Nokogiri::NodeSet]
139
+ def items_find_by(attribute, value)
140
+ xml.css('channel > item').select do |item|
141
+ attribute_value(item, attribute) == value
142
+ end
143
+ end
144
+
145
+ # Get element's attribute value
146
+ #
147
+ # @param [Nokogiri::XML::Element]
148
+ # @param [String]
149
+ # @return [String]
150
+ def attribute_value(element, attribute)
151
+ element.at_css(attribute).text
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'post'
4
+ require 'php-serialize'
5
+ require 'faraday'
6
+ require 'progressbar'
7
+
8
+ module SuttyMigration
9
+ class WordpressXml
10
+ # Represents an attachment or uploaded file.
11
+ class Attachment < Post
12
+ # File URL
13
+ #
14
+ # @return [String]
15
+ def attachment_url
16
+ @attachment_url ||= attribute_value 'attachment_url'
17
+ end
18
+
19
+ # File destination
20
+ #
21
+ # @return [String]
22
+ def dest
23
+ @dest ||= URI(attachment_url).path.sub(%r{\A/}, '')
24
+ end
25
+
26
+ # Metadata, with file information as a Hash
27
+ #
28
+ # @return [Hash]
29
+ def meta
30
+ super.tap do |m|
31
+ m['_wp_attachment_metadata'] = PHP.unserialize m['_wp_attachment_metadata']
32
+ end
33
+ end
34
+
35
+ # Download the file if it doesn't exist. Optionally show a
36
+ # progress bar.
37
+ #
38
+ # @param :progress [Boolean]
39
+ # @return [Boolean]
40
+ def download(progress: true)
41
+ return true if File.exist? dest
42
+
43
+ ::Jekyll.logger.info "Downloading #{dest}"
44
+
45
+ FileUtils.mkdir_p File.dirname(dest)
46
+
47
+ File.open(dest, 'w') do |f|
48
+ if progress
49
+ head = Faraday.head(attachment_url)
50
+ content_length = head.headers['content-length'].to_i
51
+ progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
52
+ end
53
+
54
+ Faraday.get(attachment_url) do |req|
55
+ req.options.on_data = proc do |chunk, downloaded_bytes|
56
+ f.write chunk
57
+
58
+ if progress
59
+ progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ File.exist? dest
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wordpress_formatting/wpautop'
4
+ require 'jekyll/utils'
5
+
6
+ module SuttyMigration
7
+ class WordpressXml
8
+ # Represents a WordPress post
9
+ class Post
10
+ attr_reader :wordpress, :item
11
+
12
+ # @param :wordpress [SuttyMigration::WordpressXml]
13
+ # @param :item [Nokogiri::XML::Element]
14
+ def initialize(wordpress:, item:)
15
+ @wordpress = wordpress
16
+ @item = item
17
+ end
18
+
19
+ def inspect
20
+ "#<SuttyMigration::WordpressXml::Post title=\"#{title}\">"
21
+ end
22
+
23
+ # Post ID
24
+ #
25
+ # @return [Integer]
26
+ def id
27
+ @id ||= attribute_value('post_id').to_i
28
+ end
29
+
30
+ # Permalink. Absolute URL to the post.
31
+ #
32
+ # @return [String]
33
+ def permalink
34
+ @permalink ||= attribute_value('link').sub(wordpress.url, '')
35
+ end
36
+
37
+ # Title
38
+ #
39
+ # @return [String]
40
+ def title
41
+ @title ||= attribute_value('title')
42
+ end
43
+
44
+ # Description
45
+ #
46
+ # @return [String]
47
+ def description
48
+ @description ||= attribute_value('description')
49
+ end
50
+
51
+ # Slug ("post name")
52
+ #
53
+ # @return [String]
54
+ def slug
55
+ @slug ||= attribute_value('post_name')
56
+ end
57
+
58
+ # Publication date.
59
+ #
60
+ # WordPress can store this date in three different fields and
61
+ # sometimes they come empty or invalid.
62
+ #
63
+ # @return [Time]
64
+ def date
65
+ @date ||= %w[pubDate post_date_gmt post_date].map do |date_attr|
66
+ ::Jekyll::Utils.parse_date attribute_value(date_attr)
67
+ rescue StandardError
68
+ end.compact.first
69
+ end
70
+
71
+ # Modification date.
72
+ #
73
+ # @return [Time]
74
+ def last_modified_at
75
+ @last_modified_at ||= ::Jekyll::Utils.parse_date attribute_value('post_modified_gmt')
76
+ end
77
+
78
+ # Content as HTML, with site URL removed.
79
+ #
80
+ # @return [String]
81
+ def content
82
+ @content ||= WordpressFormatting::Wpautop.wpautop(attribute_value('encoded')).gsub(
83
+ / (href|src)="#{wordpress.url}/, ' \\1="'
84
+ )
85
+ end
86
+
87
+ # Author attributes.
88
+ #
89
+ # @return [Hash]
90
+ def author
91
+ @author ||= wordpress.authors[attribute_value('creator')]
92
+ end
93
+
94
+ # Post password. Use with jekyll-crypto.
95
+ #
96
+ # @return [String]
97
+ def password
98
+ @password ||= attribute_value 'post_password'
99
+ end
100
+
101
+ # Tags with attributes.
102
+ #
103
+ # @return [Hash]
104
+ def tags
105
+ @tags ||= item.css('category').select do |c|
106
+ c[:domain] == 'post_tag'
107
+ end.map do |c|
108
+ wordpress.tags[c[:nicename]]
109
+ end
110
+ end
111
+
112
+ # Categories with attributes.
113
+ #
114
+ # @return [Hash]
115
+ def categories
116
+ @categories ||= item.css('category').select do |c|
117
+ c[:domain] == 'category'
118
+ end.map do |c|
119
+ wordpress.categories[c[:nicename]]
120
+ end
121
+ end
122
+
123
+ # Metadata. Plugins store useful information here. Duplicated
124
+ # keys are returned as an Array of values.
125
+ #
126
+ # @return [Hash]
127
+ def meta
128
+ @meta ||= {}.tap do |meta|
129
+ item.css('postmeta').each do |m|
130
+ key = m.css('meta_key').text
131
+ value = m.css('meta_value').text
132
+
133
+ case meta[key]
134
+ when nil then meta[key] = value
135
+ when String then meta[key] = [meta[key], value]
136
+ when Array then meta[key] << value
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ # Order. Higher are sorted on top by jekyll-order.
143
+ #
144
+ # @return [Integer]
145
+ def order
146
+ @order ||= attribute_value 'is_sticky'
147
+ end
148
+
149
+ # Publication status
150
+ #
151
+ # @return [Boolean]
152
+ def published?
153
+ @published ||= attribute_value('status') == 'publish'
154
+ end
155
+
156
+ # Publication status
157
+ #
158
+ # @return [Boolean]
159
+ def draft?
160
+ @draft ||= attribute_value('status') == 'draft'
161
+ end
162
+
163
+ # Get a value from the attribute
164
+ #
165
+ # @return [String]
166
+ def attribute_value(key)
167
+ item.at_css(key).text
168
+ end
169
+ end
170
+ end
171
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sutty-migration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - f
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-10 00:00:00.000000000 Z
11
+ date: 2021-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jekyll
@@ -108,6 +108,48 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '5.45'
111
+ - !ruby/object:Gem::Dependency
112
+ name: wordpress-formatting
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.1.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.1.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: nokogiri
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 1.12.0
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 1.12.0
139
+ - !ruby/object:Gem::Dependency
140
+ name: php-serialize
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 1.3.0
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 1.3.0
111
153
  - !ruby/object:Gem::Dependency
112
154
  name: pry
113
155
  requirement: !ruby/object:Gem::Requirement
@@ -138,6 +180,9 @@ files:
138
180
  - lib/sutty_migration/data.rb
139
181
  - lib/sutty_migration/jekyll/document_creator.rb
140
182
  - lib/sutty_migration/wordpress.rb
183
+ - lib/sutty_migration/wordpress_xml.rb
184
+ - lib/sutty_migration/wordpress_xml/attachment.rb
185
+ - lib/sutty_migration/wordpress_xml/post.rb
141
186
  homepage: https://0xacab.org/sutty/jekyll/sutty-migration
142
187
  licenses:
143
188
  - GPL-3.0