sutty-migration 0.2.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f5a175d5ab6dd98eb4bed7ac987961ce9aaae15355c3eeb8e191805a6192ae34
4
- data.tar.gz: f677b5f7945ba06403239197798d1b37eaea975c2057734c16484a0c75204f48
3
+ metadata.gz: 0dc6ffbe6f1f29803b5690efe018a561dab4e95a85dbfe395da76c6b0a63244f
4
+ data.tar.gz: ae4a2582cfd7742dbb870bd06da9088987e6b65328c40933a507d35ef06e8a56
5
5
  SHA512:
6
- metadata.gz: 9cb1bf01c37e40036dffdd19b14b78fbdf538af45625867eb6cbd45ab66bcb468a91b11a13ab86ca83e2d8692235ed0c6d4d919186d3a8713f9c81723c7978a6
7
- data.tar.gz: a3fc41c535e1a028526beb8904be1a72b2eee49473e4036059e7b0d4bbeff9b8670bf20b564636709605200aeede0c928bf9c9613146339a01803d396c570c05
6
+ metadata.gz: cd672998fd7f86b7b1f5c3992ce4d0db017d2eeacf8ca92cccbe0620016bd6bea183a933641083791b7abf14cf4a5d934d4e50589eae9d62b334ef8a403bf629
7
+ data.tar.gz: 6b51a2e800cb0a707cd0a36bda70a4d8aad07a33a63c0e22899be96c1e7a9e524227a735f37282d7066ef46cba2455e948cfc9346f5155df45235bf3c27852fc
data/README.md CHANGED
@@ -23,6 +23,7 @@ Add the plugin to your `_config.yml`:
23
23
  ```yaml
24
24
  plugins:
25
25
  - sutty-migration
26
+ array_separator: ','
26
27
  ```
27
28
 
28
29
  Compile a CSV file with the following required fields:
@@ -114,6 +115,55 @@ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
114
115
  end
115
116
  ```
116
117
 
118
+ ### WordPress XML
119
+
120
+ If you have the XML dump from a WordPress site, you can migrate content
121
+ by writing a migration plugin.
122
+
123
+ ```ruby
124
+ # frozen_string_literal: true
125
+
126
+ require 'sutty_migration/jekyll/document_creator'
127
+ require 'sutty_migration/wordpress_xml'
128
+ require 'jekyll-write-and-commit-changes'
129
+ require 'securerandom'
130
+
131
+ # Run after reading the site
132
+ Jekyll::Hooks.register :site, :post_read do |site|
133
+ # Put the XML dump at _files/wordpress.xml
134
+ xml = SuttyMigration::WordpressXml.new site: site, file: '_files/wordpress.xml'
135
+
136
+ # Download all files
137
+ xml.attachments.values.map(&:download)
138
+
139
+ # Migrate posts. You can move metadata around and recover
140
+ # relationships or any info your theme requires.
141
+ xml.posts.values.each do |post|
142
+ # Update documents already migrated.
143
+ doc = Jekyll::Document.find_or_create(site: site, collection: locale, title: post.title, slug: post.slug, date: post.date)
144
+ # Don't change the UUIDv4
145
+ d.data['uuid'] ||= SecureRandom.uuid
146
+ d.data['draft'] = post.draft?
147
+ d.data['layout'] = 'post'
148
+ d.data['last_modified_at'] = post.last_modified_at
149
+
150
+ d.data['categories'] = post.categories.map { |c| c[:title] }
151
+ d.data['tags'] = post.tags.map { |t| t[:title] }
152
+
153
+ d.data['author'] = post.author[:email]
154
+ d.data['description'] = post.description
155
+ d.content = post.content
156
+
157
+ doc.save
158
+ rescue => e
159
+ Jekyll.logger.warn "Couldn't migrate #{post.title}"
160
+ end
161
+
162
+ exit # Stop here
163
+ end
164
+ ```
165
+
166
+
117
167
  ## Contributing
118
168
 
119
169
  Bug reports and pull requests are welcome on 0xacab.org at
@@ -13,6 +13,8 @@ require_relative 'jekyll/document_creator'
13
13
  Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
14
14
  documents = site.documents
15
15
 
16
+ array_separator = site.config.fetch('array_separator', ',')
17
+
16
18
  site.data['layouts']&.each do |name, layout|
17
19
  site.data.dig('migration', name)&.each do |row|
18
20
  row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
@@ -24,41 +26,47 @@ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
24
26
  end
25
27
  end
26
28
 
27
- document ||= Jekyll::Document.create(site: site, collection: 'posts', **row.slice(*%w[date slug title]).transform_keys(&:to_sym))
29
+ document ||= begin
30
+ data = row.slice(*%w[date slug title]).transform_keys(&:to_sym)
31
+ Jekyll::Document.find_or_create(site: site, collection: 'posts', **data)
32
+ end
33
+ next unless document
28
34
 
29
35
  row.each do |attribute, value|
30
- next unless value.blank?
36
+ next if value.nil? || value.blank?
37
+
38
+ value.strip! if value.is_a? String
31
39
 
32
40
  row[attribute] =
33
41
  case layout.dig(attribute, 'type')
34
- when 'string' then value
35
- when 'text' then value
36
- when 'tel' then value
37
- # TODO: validate
38
- when 'color' then value
39
- when 'date' then Jekyll::Utils.parse_date(value)
40
- # TODO: validate
41
- when 'email' then value
42
- # TODO: validate
43
- when 'url' then value
44
- when 'content' then value
45
- when 'markdown_content' then value
46
- when 'markdown' then value
47
- when 'number' then value.to_i
48
- when 'order' then value.to_i
49
- when 'boolean' then !value.strip.empty?
50
- when 'array' then value.split(',').map(&:strip)
51
- # TODO: process values from the default array
52
- when 'predefined_array' then value.split(',').map(&:strip)
53
- when 'image' then { 'path' => value, 'description' => '' }
54
- when 'file' then { 'path' => value, 'description' => '' }
55
- when 'geo' then %w[lat lng].zip(value.split(',', 2).map(&:to_f)).to_h
56
- when 'belongs_to' then value
57
- when 'has_many' then value.split(',').map(&:strip)
58
- when 'has_and_belongs_to_many' then value.split(',').map(&:strip)
59
- when 'related_posts' then value.split(',').map(&:strip)
60
- when 'locales' then value.split(',').map(&:strip)
61
- else value
42
+ when 'string' then value.tr("\n", ' ').squeeze(' ')
43
+ when 'text' then value.gsub("\n", "\n\n")
44
+ when 'tel' then value.tr("\n", ' ').squeeze(' ')
45
+ # TODO: validate
46
+ when 'color' then value.tr("\n", ' ').squeeze(' ')
47
+ when 'date' then Jekyll::Utils.parse_date(value)
48
+ # TODO: validate
49
+ when 'email' then value.tr("\n", ' ').squeeze(' ')
50
+ # TODO: validate
51
+ when 'url' then value.tr("\n", ' ').squeeze(' ')
52
+ when 'content' then value.gsub("\n", "\n\n")
53
+ when 'markdown_content' then value.gsub("\n", "\n\n")
54
+ when 'markdown' then value.gsub("\n", "\n\n")
55
+ when 'number' then value.to_i
56
+ when 'order' then value.to_i
57
+ when 'boolean' then !value.strip.empty?
58
+ when 'array' then value.split(array_separator).map(&:strip)
59
+ # TODO: process values from the default array
60
+ when 'predefined_array' then value.split(array_separator).map(&:strip)
61
+ when 'image' then { 'path' => value, 'description' => '' }
62
+ when 'file' then { 'path' => value, 'description' => '' }
63
+ when 'geo' then %w[lat lng].zip(value.split(array_separator, 2).map(&:to_f)).to_h
64
+ when 'belongs_to' then value
65
+ when 'has_many' then value.split(array_separator).map(&:strip)
66
+ when 'has_and_belongs_to_many' then value.split(array_separator).map(&:strip)
67
+ when 'related_posts' then value.split(array_separator).map(&:strip)
68
+ when 'locales' then value.split(array_separator).map(&:strip)
69
+ else value
62
70
  end
63
71
  end
64
72
 
@@ -7,31 +7,76 @@ module SuttyMigration
7
7
  module Jekyll
8
8
  module DocumentCreator
9
9
  class DocumentExists < ArgumentError; end
10
+
10
11
  def self.included(base)
11
12
  base.class_eval do
13
+ class << self
14
+ # Creates a new document in a collection or fails if it already
15
+ # exists.
16
+ #
17
+ # @param :site [Jekyll::Site] Jekyll site
18
+ # @param :date [Time] Post date
19
+ # @param :title [String] Post title
20
+ # @param :slug [String] Post slug, slugified title if empty
21
+ # @param :collection [Jekyll::Collection,String] Collection label or collection
22
+ # @return [Jekyll::Document] A new document
23
+ def create(site:, date:, title:, collection:, slug: nil)
24
+ collection = site.collections[collection] if collection.is_a? String
25
+ slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
26
+ basename = "#{date.strftime('%F')}-#{slug}.markdown"
27
+ path = File.join(collection.relative_directory, basename)
12
28
 
13
- # Creates a new document in a collection or fails if it already
14
- # exists.
15
- #
16
- # @param :site [Jekyll::Site] Jekyll site
17
- # @param :date [Time] Post date
18
- # @param :title [String] Post title
19
- # @param :slug [String] Post slug, slugified title if empty
20
- # @param :collection [Jekyll::Collection,String] Collection label or collection
21
- # @return [Jekyll::Document] A new document
22
- def self.create(site:, date:, title:, slug: nil, collection:)
23
- collection = site.collections[collection] if collection.is_a? String
24
- slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
25
- basename = "#{date.strftime('%F')}-#{slug}.markdown"
26
- path = File.join(collection.directory, basename)
27
-
28
- raise DocumentExists, "#{path} already exists" if File.exist? path
29
-
30
- ::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
31
- collection.docs << document
32
- document.data['title'] = title
33
- end
34
- end
29
+ raise DocumentExists, "#{path} already exists" if File.exist? path
30
+
31
+ indexed_documents_by_relative_path(site)[path] =
32
+ ::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
33
+ collection.docs << document
34
+ document.data['title'] = title
35
+ end
36
+ end
37
+
38
+ # Finds a document by its relative path or creates it if it
39
+ # doesn't exist. Helpful for idempotent migrations (create or
40
+ # update actions)
41
+ #
42
+ # @param :site [Jekyll::Site] Jekyll site
43
+ # @param :date [Time] Post date
44
+ # @param :title [String] Post title
45
+ # @param :slug [String] Post slug, slugified title if empty
46
+ # @param :collection [Jekyll::Collection,String] Collection label or collection
47
+ # @return [Jekyll::Document] The found document or a new one
48
+ def find_or_create(site:, date:, title:, collection:, slug: nil)
49
+ collection = site.collections[collection] if collection.is_a? String
50
+ slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
51
+ basename = "#{date.strftime('%F')}-#{slug}.markdown"
52
+ path = File.join(collection.relative_directory, basename)
53
+
54
+ return find(site: site, relative_path: path) if File.exist?(path)
55
+
56
+ create(site: site, date: date, title: title, slug: slug, collection: collection)
57
+ end
58
+
59
+ # Finds a document by its relative path
60
+ #
61
+ # @param :site [Jekyll::Site]
62
+ # @param :relative_path [String]
63
+ # @return [Jekyll::Document,Nil]
64
+ def find(site:, relative_path:)
65
+ indexed_documents_by_relative_path(site)[relative_path]
66
+ end
67
+
68
+ # Index documents by relative path for faster finding
69
+ #
70
+ # @param [Jekyll::Site]
71
+ # @return [Hash]
72
+ def indexed_documents_by_relative_path(site)
73
+ @indexed_documents_by_relative_path ||= site.documents.reduce({}) do |idx, doc|
74
+ idx.tap do |i|
75
+ i[doc.relative_path] = doc
76
+ end
77
+ end
78
+ end
79
+ end
35
80
  end
36
81
  end
37
82
  end
@@ -43,7 +43,9 @@ module SuttyMigration
43
43
  pfx = "#{prefix}#{blog[:blog_id]}_" if blog[:blog_id] > 1
44
44
  pfx ||= prefix
45
45
 
46
- [ blog[:blog_id], blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit, multisite: self)) ]
46
+ [blog[:blog_id],
47
+ blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit,
48
+ multisite: self))]
47
49
  end.to_h
48
50
  end
49
51
 
@@ -94,11 +96,11 @@ module SuttyMigration
94
96
  end
95
97
 
96
98
  Faraday.get(url) do |req|
97
- req.options.on_data = Proc.new do |chunk, downloaded_bytes|
99
+ req.options.on_data = proc do |chunk, downloaded_bytes|
98
100
  f.write chunk
99
101
 
100
102
  if progress
101
- progress.progress = (downloaded_bytes > content_length) ? content_length : downloaded_bytes
103
+ progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
102
104
  end
103
105
  end
104
106
  end
@@ -130,10 +132,31 @@ module SuttyMigration
130
132
  p.map do |post|
131
133
  # Sequel parses dates on localtime
132
134
  post[:date] = ::Jekyll::Utils.parse_date(post[:date]) unless post[:date].blank?
133
- post[:last_modified_at] = ::Jekyll::Utils.parse_date(post[:last_modified_at]) unless post[:last_modified_at].blank?
135
+ unless post[:last_modified_at].blank?
136
+ post[:last_modified_at] =
137
+ ::Jekyll::Utils.parse_date(post[:last_modified_at])
138
+ end
134
139
 
135
- post[:front_matter] = JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values { |v| v.size == 1 ? v.first : v } unless post[:front_matter].blank?
136
- post[:terms] = JSON.parse(post[:terms]).transform_keys(&:to_sym) unless post[:terms].blank?
140
+ post[:front_matter] =
141
+ begin
142
+ unless post[:front_matter].blank?
143
+ JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values do |v|
144
+ v.size == 1 ? v.first : v
145
+ end
146
+ end
147
+ rescue JSON::ParserError
148
+ {}
149
+ end
150
+ post[:terms] =
151
+ begin
152
+ unless post[:terms].blank?
153
+ JSON.parse(post[:terms]).transform_keys(&:to_sym).transform_values do |v|
154
+ v.size == 1 ? v.first : v
155
+ end
156
+ end
157
+ rescue JSON::ParserError
158
+ {}
159
+ end
137
160
  end
138
161
  end
139
162
  end
@@ -167,7 +190,7 @@ module SuttyMigration
167
190
  <<~EOQ
168
191
  select
169
192
  u.*
170
- #{", json_group_object(m.meta_key, m.meta_value) as meta" if with_meta}
193
+ #{', json_group_object(m.meta_key, m.meta_value) as meta' if with_meta}
171
194
  from #{pfx}users as u
172
195
  #{"left join #{pfx}usermeta as m on m.user_id = u.id" if with_meta}
173
196
  group by u.id
@@ -203,11 +226,11 @@ module SuttyMigration
203
226
  p.menu_order as menu_order,
204
227
  p.post_mime_type as mime_type,
205
228
  p.comment_count as comment_count
206
- #{", f.front_matter as front_matter" if with_meta}
207
- #{", t.terms as terms" if with_meta}
229
+ #{', f.front_matter as front_matter' if with_meta}
230
+ #{', t.terms as terms' if with_meta}
208
231
  from #{prefix}posts as p
209
- #{"left join (#{meta_query}) as f on f.post_id = p.ID" if with_meta}
210
- #{"left join (#{terms_query(layout: layout)}) as t on t.id = p.ID" if with_meta}
232
+ #{"left join (#{meta_query(layout: layout)}) as f on f.post_id = p.ID" if with_meta}
233
+ #{"left join (#{terms_query(layout: layout)}) as t on t.post_id = p.ID" if with_meta}
211
234
  #{"where p.post_type = '#{layout}'" if layout}
212
235
  group by p.ID
213
236
  EOQ
@@ -217,7 +240,7 @@ module SuttyMigration
217
240
  # converted to arrays
218
241
  #
219
242
  # @return [String]
220
- def meta_query
243
+ def meta_query(layout: nil)
221
244
  <<~EOQ
222
245
  select
223
246
  post_id,
@@ -230,6 +253,7 @@ module SuttyMigration
230
253
  from #{prefix}postmeta
231
254
  group by post_id, meta_key
232
255
  )
256
+ #{"where post_id in (select ID from #{prefix}posts where post_type = '#{layout}')" if layout}
233
257
  group by post_id
234
258
  EOQ
235
259
  end
@@ -241,14 +265,19 @@ module SuttyMigration
241
265
  def terms_query(layout: nil)
242
266
  <<~EOQ
243
267
  select
244
- p.ID as id,
245
- json_group_object(tt.taxonomy, t.name) as terms
246
- from #{prefix}posts as p
247
- left join #{prefix}term_relationships as r on r.object_id = p.ID
248
- left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
249
- left join #{prefix}terms as t on t.term_id = tt.term_id
250
- #{"where p.post_type = '#{layout}'" if layout}
251
- group by p.ID
268
+ post_id,
269
+ json_group_object(taxonomy, json(terms)) as terms
270
+ from (
271
+ select
272
+ r.object_id as post_id,
273
+ tt.taxonomy,
274
+ json_group_array(t.name) as terms
275
+ from #{prefix}term_relationships as r
276
+ left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
277
+ left join #{prefix}terms as t on t.term_id = tt.term_id
278
+ #{"where r.object_id in (select ID from #{prefix}posts where post_type = '#{layout}')" if layout}
279
+ group by r.object_id, tt.taxonomy)
280
+ group by post_id
252
281
  EOQ
253
282
  end
254
283
  end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'post'
4
+ require 'php-serialize'
5
+ require 'faraday'
6
+ require 'progressbar'
7
+
8
+ module SuttyMigration
9
+ class WordpressXml
10
+ # Represents an attachment or uploaded file.
11
+ class Attachment < Post
12
+ # File URL
13
+ #
14
+ # @return [String]
15
+ def attachment_url
16
+ @attachment_url ||= attribute_value 'attachment_url'
17
+ end
18
+
19
+ # File destination
20
+ #
21
+ # @return [String]
22
+ def dest
23
+ @dest ||= URI(attachment_url).path.sub(%r{\A/}, '')
24
+ end
25
+
26
+ # Metadata, with file information as a Hash
27
+ #
28
+ # @return [Hash]
29
+ def meta
30
+ super.tap do |m|
31
+ m['_wp_attachment_metadata'] = PHP.unserialize m['_wp_attachment_metadata']
32
+ end
33
+ end
34
+
35
+ # Download the file if it doesn't exist. Optionally show a
36
+ # progress bar.
37
+ #
38
+ # @param :progress [Boolean]
39
+ # @return [Boolean]
40
+ def download(progress: true)
41
+ return true if File.exist? dest
42
+
43
+ ::Jekyll.logger.info "Downloading #{dest}"
44
+
45
+ FileUtils.mkdir_p File.dirname(dest)
46
+
47
+ File.open(dest, 'w') do |f|
48
+ if progress
49
+ head = Faraday.head(attachment_url)
50
+ content_length = head.headers['content-length'].to_i
51
+ progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
52
+ end
53
+
54
+ Faraday.get(attachment_url) do |req|
55
+ req.options.on_data = proc do |chunk, downloaded_bytes|
56
+ f.write chunk
57
+
58
+ if progress
59
+ progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ File.exist? dest
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'wordpress_formatting/wpautop'
4
+ require 'jekyll/utils'
5
+
6
+ module SuttyMigration
7
+ class WordpressXml
8
+ # Represents a WordPress post
9
+ class Post
10
+ attr_reader :wordpress, :item
11
+
12
+ # @param :wordpress [SuttyMigration::WordpressXml]
13
+ # @param :item [Nokogiri::XML::Element]
14
+ def initialize(wordpress:, item:)
15
+ @wordpress = wordpress
16
+ @item = item
17
+ end
18
+
19
+ def inspect
20
+ "#<SuttyMigration::WordpressXml::Post title=\"#{title}\">"
21
+ end
22
+
23
+ # Post ID
24
+ #
25
+ # @return [Integer]
26
+ def id
27
+ @id ||= attribute_value('post_id').to_i
28
+ end
29
+
30
+ # Permalink. Absolute URL to the post.
31
+ #
32
+ # @return [String]
33
+ def permalink
34
+ @permalink ||= attribute_value('link').sub(wordpress.url, '')
35
+ end
36
+
37
+ # Title
38
+ #
39
+ # @return [String]
40
+ def title
41
+ @title ||= attribute_value('title')
42
+ end
43
+
44
+ # Description
45
+ #
46
+ # @return [String]
47
+ def description
48
+ @description ||= attribute_value('description')
49
+ end
50
+
51
+ # Slug ("post name")
52
+ #
53
+ # @return [String]
54
+ def slug
55
+ @slug ||= attribute_value('post_name')
56
+ end
57
+
58
+ # Publication date.
59
+ #
60
+ # WordPress can store this date in three different fields and
61
+ # sometimes they come empty or invalid.
62
+ #
63
+ # @return [Time]
64
+ def date
65
+ @date ||= %w[pubDate post_date_gmt post_date].map do |date_attr|
66
+ ::Jekyll::Utils.parse_date attribute_value(date_attr)
67
+ rescue StandardError
68
+ end.compact.first
69
+ end
70
+
71
+ # Modification date.
72
+ #
73
+ # @return [Time]
74
+ def last_modified_at
75
+ @last_modified_at ||= ::Jekyll::Utils.parse_date attribute_value('post_modified_gmt')
76
+ end
77
+
78
+ # Content as HTML, with site URL removed.
79
+ #
80
+ # @return [String]
81
+ def content
82
+ @content ||= WordpressFormatting::Wpautop.wpautop(attribute_value('encoded')).gsub(
83
+ / (href|src)="#{wordpress.url}/, ' \\1="'
84
+ )
85
+ end
86
+
87
+ # Author attributes.
88
+ #
89
+ # @return [Hash]
90
+ def author
91
+ @author ||= wordpress.authors[attribute_value('creator')]
92
+ end
93
+
94
+ # Post password. Use with jekyll-crypto.
95
+ #
96
+ # @return [String]
97
+ def password
98
+ @password ||= attribute_value 'post_password'
99
+ end
100
+
101
+ # Tags with attributes.
102
+ #
103
+ # @return [Hash]
104
+ def tags
105
+ @tags ||= item.css('category').select do |c|
106
+ c[:domain] == 'post_tag'
107
+ end.map do |c|
108
+ wordpress.tags[c[:nicename]]
109
+ end
110
+ end
111
+
112
+ # Categories with attributes.
113
+ #
114
+ # @return [Hash]
115
+ def categories
116
+ @categories ||= item.css('category').select do |c|
117
+ c[:domain] == 'category'
118
+ end.map do |c|
119
+ wordpress.categories[c[:nicename]]
120
+ end
121
+ end
122
+
123
+ # Metadata. Plugins store useful information here. Duplicated
124
+ # keys are returned as an Array of values.
125
+ #
126
+ # @return [Hash]
127
+ def meta
128
+ @meta ||= {}.tap do |meta|
129
+ item.css('postmeta').each do |m|
130
+ key = m.css('meta_key').text
131
+ value = m.css('meta_value').text
132
+
133
+ case meta[key]
134
+ when nil then meta[key] = value
135
+ when String then meta[key] = [meta[key], value]
136
+ when Array then meta[key] << value
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ # Order. Higher are sorted on top by jekyll-order.
143
+ #
144
+ # @return [Integer]
145
+ def order
146
+ @order ||= attribute_value 'is_sticky'
147
+ end
148
+
149
+ # Publication status
150
+ #
151
+ # @return [Boolean]
152
+ def published?
153
+ @published ||= attribute_value('status') == 'publish'
154
+ end
155
+
156
+ # Publication status
157
+ #
158
+ # @return [Boolean]
159
+ def draft?
160
+ @draft ||= attribute_value('status') == 'draft'
161
+ end
162
+
163
+ # Get a value from the attribute
164
+ #
165
+ # @return [String]
166
+ def attribute_value(key)
167
+ item.at_css(key).text
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require_relative 'wordpress_xml/post'
5
+ require_relative 'wordpress_xml/attachment'
6
+
7
+ module SuttyMigration
8
+ # Understands the XML dump generated by Wordpress and creates
9
+ # Jekyll::Documents
10
+ class WordpressXml
11
+ attr_reader :site, :file, :xml
12
+
13
+ # @param :site [Jekyll::Site] Jekyll site
14
+ # @param :file [String] File path
15
+ def initialize(site:, file:)
16
+ @site = site
17
+ @file = file
18
+ @xml = Nokogiri::XML File.read(file)
19
+
20
+ # Make things easier by removing namespaces.
21
+ xml.remove_namespaces!
22
+ end
23
+
24
+ def inspect
25
+ '#<SuttyMigration::WordpressXml>'
26
+ end
27
+
28
+ # Site URL
29
+ #
30
+ # @return [String]
31
+ def url
32
+ @url ||= attribute_value(xml, 'channel > link')
33
+ end
34
+
35
+ # Site title
36
+ #
37
+ # @return [String]
38
+ def title
39
+ @title ||= attribute_value(xml, 'channel > title')
40
+ end
41
+
42
+ # Description
43
+ #
44
+ # @return [String]
45
+ def description
46
+ @description ||= attribute_value(xml, 'channel > description')
47
+ end
48
+
49
+ # Language
50
+ #
51
+ # TODO: Migrate multilanguage sites.
52
+ #
53
+ # @return [String]
54
+ def language
55
+ @language ||= attribute_value(xml, 'channel > language')
56
+ end
57
+
58
+ # Authors with attributes, indexed by author email.
59
+ #
60
+ # @return [Hash]
61
+ def authors
62
+ @authors ||= xml.css('channel > author').map do |author|
63
+ {
64
+ attribute_value(author, 'author_email') => {
65
+ id: attribute_value(author, 'author_id').to_i,
66
+ display_name: attribute_value(author, 'author_display_name'),
67
+ first_name: attribute_value(author, 'author_first_name'),
68
+ last_name: attribute_value(author, 'author_last_name'),
69
+ email: attribute_value(author, 'author_email')
70
+
71
+ }
72
+ }
73
+ end.reduce(&:merge)
74
+ end
75
+
76
+ # Categories with attributes, indexed by slug ("nicename")
77
+ #
78
+ # @return [Hash]
79
+ def categories
80
+ @categories ||= xml.css('channel > category').map do |category|
81
+ {
82
+ attribute_value(category, 'category_nicename') => {
83
+ id: attribute_value(category, 'term_id').to_i,
84
+ title: attribute_value(category, 'cat_name'),
85
+ parent: attribute_value(category, 'category_parent'),
86
+ slug: attribute_value(category, 'category_nicename')
87
+ }
88
+ }
89
+ end.reduce(&:merge)
90
+ end
91
+
92
+ # Tags with attributes, indexed by slug
93
+ #
94
+ # @return [Hash]
95
+ def tags
96
+ @tags ||= xml.css('channel > tag').map do |tag|
97
+ {
98
+ attribute_value(tag, 'tag_slug') => {
99
+ id: attribute_value(tag, 'term_id').to_i,
100
+ title: attribute_value(tag, 'tag_name'),
101
+ slug: attribute_value(tag, 'tag_slug')
102
+ }
103
+ }
104
+ end.reduce(&:merge)
105
+ end
106
+
107
+ # Posts, indexed by ID
108
+ #
109
+ # @return [Hash]
110
+ def posts
111
+ @posts ||= items_find_by('post_type', 'post').map do |post|
112
+ { attribute_value(post, 'post_id').to_i => Post.new(wordpress: self, item: post) }
113
+ end.reduce(&:merge)
114
+ end
115
+
116
+ # Pages, indexed by ID
117
+ #
118
+ # @return [Hash]
119
+ def pages
120
+ @pages ||= items_find_by('post_type', 'page').map do |page|
121
+ { attribute_value(page, 'post_id').to_i => Post.new(wordpress: self, item: page) }
122
+ end.reduce(&:merge)
123
+ end
124
+
125
+ # Attachments, indexed by ID
126
+ #
127
+ # @return [Hash]
128
+ def attachments
129
+ @attachments ||= items_find_by('post_type', 'attachment').map do |attachment|
130
+ { attribute_value(attachment, 'post_id').to_i => Attachment.new(wordpress: self, item: attachment) }
131
+ end.reduce(&:merge)
132
+ end
133
+
134
+ # Find items by attribute and value
135
+ #
136
+ # @param [String] Attribute name
137
+ # @param [String] Attribute value
138
+ # @return [Nokogiri::NodeSet]
139
+ def items_find_by(attribute, value)
140
+ xml.css('channel > item').select do |item|
141
+ attribute_value(item, attribute) == value
142
+ end
143
+ end
144
+
145
+ # Get element's attribute value
146
+ #
147
+ # @param [Nokogiri::XML::Element]
148
+ # @param [String]
149
+ # @return [String]
150
+ def attribute_value(element, attribute)
151
+ element.at_css(attribute).text
152
+ end
153
+ end
154
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sutty-migration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - f
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-10 00:00:00.000000000 Z
11
+ date: 2021-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jekyll
@@ -108,6 +108,48 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: '5.45'
111
+ - !ruby/object:Gem::Dependency
112
+ name: wordpress-formatting
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.1.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.1.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: nokogiri
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '1.11'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '1.11'
139
+ - !ruby/object:Gem::Dependency
140
+ name: php-serialize
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 1.3.0
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 1.3.0
111
153
  - !ruby/object:Gem::Dependency
112
154
  name: pry
113
155
  requirement: !ruby/object:Gem::Requirement
@@ -138,6 +180,9 @@ files:
138
180
  - lib/sutty_migration/data.rb
139
181
  - lib/sutty_migration/jekyll/document_creator.rb
140
182
  - lib/sutty_migration/wordpress.rb
183
+ - lib/sutty_migration/wordpress_xml.rb
184
+ - lib/sutty_migration/wordpress_xml/attachment.rb
185
+ - lib/sutty_migration/wordpress_xml/post.rb
141
186
  homepage: https://0xacab.org/sutty/jekyll/sutty-migration
142
187
  licenses:
143
188
  - GPL-3.0