sutty-migration 0.1.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7eec5cd55957ccc99beafaa4f3263b038d6eaf04cc1f87c102ae29db10180655
4
- data.tar.gz: a8b8bf2c0ed35ace1523fe5439ad732699cc91508a832ef11e31e4e44efb536e
3
+ metadata.gz: f5a175d5ab6dd98eb4bed7ac987961ce9aaae15355c3eeb8e191805a6192ae34
4
+ data.tar.gz: f677b5f7945ba06403239197798d1b37eaea975c2057734c16484a0c75204f48
5
5
  SHA512:
6
- metadata.gz: a42330f844989f491e957c1718657f0a25eb9539f1b0ebeb6fe557983e9fc32910561d6a95b36beeac5da0be81dfff0a5ada4d0a4bc8e6235c2bc382563d3788
7
- data.tar.gz: 58c8de02083376b983a7da16950202503bf33b6d62365095ce33c29408b029f69bdbe7fc8a5dd9a42563f24e0e2d386a75965b0de42f02eaf92ee6927ea81e4e
6
+ metadata.gz: 9cb1bf01c37e40036dffdd19b14b78fbdf538af45625867eb6cbd45ab66bcb468a91b11a13ab86ca83e2d8692235ed0c6d4d919186d3a8713f9c81723c7978a6
7
+ data.tar.gz: a3fc41c535e1a028526beb8904be1a72b2eee49473e4036059e7b0d4bbeff9b8670bf20b564636709605200aeede0c928bf9c9613146339a01803d396c570c05
data/README.md CHANGED
@@ -66,6 +66,54 @@ To start migration just build your site:
66
66
  bundle exec jekyll build
67
67
  ```
68
68
 
69
+ **Tip:** Files can also be JSON, TSV and YAML, since they're all
70
+ supported by Jekyll.
71
+
72
+ ### Wordpress
73
+
74
+ Instead of requiring you to install and configure MariaDB/MySQL, you can
75
+ convert the database into SQLite3 like this:
76
+
77
+ ```bash
78
+ git clone https://0xacab.org/sutty/mysql2sqlite.git
79
+ cd mysql2sqlite
80
+ ./mysql2sqlite /path/to/database/dump.sql |
81
+ sed -re "s/, 0x([0-9a-f]+),/, X'\1',/i" |
82
+ sqlite3 wordpress.sqlite3
83
+ ```
84
+
85
+ It will probably show some errors.
86
+
87
+ Note the `sed` command is required to convert hexadecimal values into
88
+ SQLite syntax, since `mysql2sqlite` doesn't support this yet.
89
+
90
+ Wordpress websites can include lots of posts and metadata, depending on
91
+ the amount of plugins installed. We don't have an official way of
92
+ dumping everything into Jekyll, because you will probably want to move
93
+ things around. You can write a plugin like this:
94
+
95
+ ```ruby
96
+ # _plugins/wordpress.rb
97
+ # frozen_string_literal: true
98
+
99
+ require 'sutty_migration/wordpress'
100
+ require 'sutty_migration/jekyll/document_creator'
101
+ require 'jekyll-write-and-commit-changes'
102
+
103
+ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
104
+ wp = SuttyMigration::Wordpress.new(site: site, database: 'wordpress.sqlite3', prefix: 'wp_', url: 'https://wordpre.ss')
105
+
106
+ # Download all files
107
+ wp.download_all
108
+
109
+ wp.posts(layout: 'post').each do |post|
110
+ doc = Jekyll::Document.create(site: site, title: post[:post_title], date: post[:post_date], collection: 'posts')
111
+ doc.content = post[:content]
112
+ doc.save
113
+ end
114
+ end
115
+ ```
116
+
69
117
  ## Contributing
70
118
 
71
119
  Bug reports and pull requests are welcome on 0xacab.org at
@@ -1,69 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'securerandom'
4
- require 'fast_blank'
5
- require 'jekyll-write-and-commit-changes'
6
-
7
- Jekyll::Hooks.register :site, :post_read do |site|
8
- documents = site.documents
9
-
10
- site.data['layouts']&.each do |name, layout|
11
- site.data.dig('migration', name)&.each do |row|
12
- row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
13
-
14
- document = documents.find do |doc|
15
- doc.data['id'] == row['id']
16
- end
17
-
18
- document ||=
19
- begin
20
- base = "#{row['date'] || Date.today.to_s}-#{Jekyll::Utils.slugify(row['title'], mode: 'latin')}.markdown"
21
- path = File.join(site.source, '_posts', base)
22
-
23
- raise ArgumentError, "Row #{row['id']} duplicates file #{base}" if File.exist? path
24
-
25
- doc = Jekyll::Document.new(path, site: site, collection: site.collections['posts'])
26
- site.collections['posts'] << doc
27
-
28
- doc
29
- end
30
-
31
- row.each do |attribute, value|
32
- row[attribute] =
33
- case layout.dig(attribute, 'type')
34
- when 'string' then value
35
- when 'text' then value
36
- when 'tel' then value
37
- when 'color' then value # TODO: validar
38
- when 'date' then Jekyll::Utils.parse_date(value)
39
- when 'email' then value # TODO: validar
40
- when 'url' then value # TODO: validar
41
- when 'content' then value
42
- when 'markdown_content' then value
43
- when 'markdown' then value
44
- when 'number' then value.to_i
45
- when 'order' then value.to_i
46
- when 'boolean' then !value.strip.empty?
47
- when 'array' then value.split(',').map(&:strip)
48
- # TODO: procesar los valores en base a los valores predefinidos
49
- when 'predefined_array' then value.split(',').map(&:strip)
50
- when 'image' then { 'path' => value, 'description' => '' }
51
- when 'file' then { 'path' => value, 'description' => '' }
52
- when 'geo' then %w[lat lng].zip(value.split(',', 2).map(&:to_f)).to_h
53
- when 'belongs_to' then value
54
- when 'has_many' then value.split(',').map(&:strip)
55
- when 'has_and_belongs_to_many' then value.split(',').map(&:strip)
56
- when 'related_posts' then value.split(',').map(&:strip)
57
- when 'locales' then value.split(',').map(&:strip)
58
- else value
59
- end
60
- end
61
-
62
- document.data['uuid'] ||= SecureRandom.uuid
63
- document.content = row.delete('content')
64
-
65
- document.data.merge! row
66
- document.save
67
- end
68
- end
69
- end
3
+ require_relative 'sutty_migration/data'
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Expandir String para poder verificar si está vacía
4
+ require 'fast_blank'
5
+
6
+ # Verificar que los valores nulos estén vacíos
7
+ class NilClass
8
+ def blank?
9
+ true
10
+ end
11
+
12
+ def present?
13
+ false
14
+ end
15
+ end
16
+
17
+ # Verificar que una fecha está vacía
18
+ class Time
19
+ def blank?
20
+ false
21
+ end
22
+
23
+ def present?
24
+ true
25
+ end
26
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'securerandom'
4
+ require_relative 'core_extensions'
5
+ require_relative 'jekyll/document_creator'
6
+
7
+ # Registers a plugin for converting CSV files into posts following
8
+ # Sutty's layout definition.
9
+ #
10
+ # If jekyll-write-and-commit-changes is enabled, documents will be saved
11
+ # on disk and commited is the build command is run with
12
+ # JEKYLL_ENV=production
13
+ Jekyll::Hooks.register :site, :post_read, priority: :low do |site|
14
+ documents = site.documents
15
+
16
+ site.data['layouts']&.each do |name, layout|
17
+ site.data.dig('migration', name)&.each do |row|
18
+ row['date'] = Jekyll::Utils.parse_date(row['date']) unless row['date'].blank?
19
+ row['date'] ||= Time.now
20
+
21
+ unless row['id'].blank?
22
+ document = documents.find do |doc|
23
+ doc.data['id'] == row['id']
24
+ end
25
+ end
26
+
27
+ document ||= Jekyll::Document.create(site: site, collection: 'posts', **row.slice(*%w[date slug title]).transform_keys(&:to_sym))
28
+
29
+ row.each do |attribute, value|
30
+ next unless value.blank?
31
+
32
+ row[attribute] =
33
+ case layout.dig(attribute, 'type')
34
+ when 'string' then value
35
+ when 'text' then value
36
+ when 'tel' then value
37
+ # TODO: validate
38
+ when 'color' then value
39
+ when 'date' then Jekyll::Utils.parse_date(value)
40
+ # TODO: validate
41
+ when 'email' then value
42
+ # TODO: validate
43
+ when 'url' then value
44
+ when 'content' then value
45
+ when 'markdown_content' then value
46
+ when 'markdown' then value
47
+ when 'number' then value.to_i
48
+ when 'order' then value.to_i
49
+ when 'boolean' then !value.strip.empty?
50
+ when 'array' then value.split(',').map(&:strip)
51
+ # TODO: process values from the default array
52
+ when 'predefined_array' then value.split(',').map(&:strip)
53
+ when 'image' then { 'path' => value, 'description' => '' }
54
+ when 'file' then { 'path' => value, 'description' => '' }
55
+ when 'geo' then %w[lat lng].zip(value.split(',', 2).map(&:to_f)).to_h
56
+ when 'belongs_to' then value
57
+ when 'has_many' then value.split(',').map(&:strip)
58
+ when 'has_and_belongs_to_many' then value.split(',').map(&:strip)
59
+ when 'related_posts' then value.split(',').map(&:strip)
60
+ when 'locales' then value.split(',').map(&:strip)
61
+ else value
62
+ end
63
+ end
64
+
65
+ document.data['uuid'] ||= SecureRandom.uuid
66
+ document.content = row.delete('content')
67
+
68
+ document.data.merge! row
69
+ document.save if document.respond_to? :save
70
+ end
71
+ end
72
+
73
+ next unless site.respond_to?(:repository)
74
+ next unless ENV['JEKYLL_ENV'] == 'production'
75
+
76
+ site.repository.commit 'CSV Migration'
77
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'jekyll/utils'
4
+ require_relative '../core_extensions'
5
+
6
+ module SuttyMigration
7
+ module Jekyll
8
+ module DocumentCreator
9
+ class DocumentExists < ArgumentError; end
10
+ def self.included(base)
11
+ base.class_eval do
12
+
13
+ # Creates a new document in a collection or fails if it already
14
+ # exists.
15
+ #
16
+ # @param :site [Jekyll::Site] Jekyll site
17
+ # @param :date [Time] Post date
18
+ # @param :title [String] Post title
19
+ # @param :slug [String] Post slug, slugified title if empty
20
+ # @param :collection [Jekyll::Collection,String] Collection label or collection
21
+ # @return [Jekyll::Document] A new document
22
+ def self.create(site:, date:, title:, slug: nil, collection:)
23
+ collection = site.collections[collection] if collection.is_a? String
24
+ slug = ::Jekyll::Utils.slugify(title, mode: 'latin') if slug.blank?
25
+ basename = "#{date.strftime('%F')}-#{slug}.markdown"
26
+ path = File.join(collection.directory, basename)
27
+
28
+ raise DocumentExists, "#{path} already exists" if File.exist? path
29
+
30
+ ::Jekyll::Document.new(path, site: site, collection: collection).tap do |document|
31
+ collection.docs << document
32
+ document.data['title'] = title
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ ::Jekyll::Document.include SuttyMigration::Jekyll::DocumentCreator
@@ -0,0 +1,255 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'time'
4
+ require 'securerandom'
5
+ require 'sequel'
6
+ require 'sqlite3'
7
+ require 'json'
8
+ require 'faraday'
9
+ require 'progressbar'
10
+ require 'jekyll/utils'
11
+
12
+ module SuttyMigration
13
+ # Brings posts and attachments from a SQLite3 database. You can
14
+ # convert a MySQL/MariaDB dump by using `mysql2sqlite`.
15
+ #
16
+ # It doesn't convert them into Jekyll posts but allows you to write a
17
+ # migration plugin where you can convert data by yourself. We may add
18
+ # this feature in the future.
19
+ class Wordpress
20
+ attr_reader :site, :prefix, :limit, :url, :wp, :database, :multisite
21
+
22
+ # @param :site [Jekyll::Site] Jekyll site
23
+ # @param :url [String] Wordpress site URL (must be up for downloads)
24
+ # @param :database [String] Database path, by default `_data/wordpress.sqlite3`
25
+ # @param :prefix [String] WP table prefix
26
+ # @param :limit [Integer] Page length
27
+ # @param :multisite [Boolean] Site is multisite
28
+ def initialize(site:, url:, database: nil, prefix: 'wp_', limit: 10, multisite: nil)
29
+ @site = site
30
+ @prefix = prefix.freeze
31
+ @limit = limit.freeze
32
+ @url = url.freeze
33
+ @database = database || File.join(site.source, '_data', 'wordpress.sqlite3')
34
+ @multisite = multisite
35
+ end
36
+
37
+ # Generate database connections for a multisite WP
38
+ #
39
+ # @return [Hash] { "ID" => SuttyMigration::Wordpress }
40
+ def blogs
41
+ @blogs ||= wp["select * from #{prefix}blogs"].to_a.map do |blog|
42
+ url = "https://#{blog[:domain]}#{blog[:path]}"
43
+ pfx = "#{prefix}#{blog[:blog_id]}_" if blog[:blog_id] > 1
44
+ pfx ||= prefix
45
+
46
+ [ blog[:blog_id], blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit, multisite: self)) ]
47
+ end.to_h
48
+ end
49
+
50
+ def options
51
+ @options ||= wp["select option_name, option_value from #{prefix}options"].to_a.map(&:values).to_h.transform_keys(&:to_sym)
52
+ end
53
+
54
+ # Open the database.
55
+ #
56
+ # @return [Sequel::SQLite::Database]
57
+ def wp
58
+ @wp ||= Sequel.sqlite(database).tap do |db|
59
+ db.extension :pagination
60
+ end
61
+ end
62
+
63
+ # Download all attachments. Adds the local path to them.
64
+ #
65
+ # @param :progress [Boolean] Toggle progress bar
66
+ # @return [Nil]
67
+ def download_all(progress: true)
68
+ posts(layout: 'attachment').each do |attachment|
69
+ attachment[:front_matter]['file_path'] = download(url: attachment[:guid], progress: progress)
70
+ end
71
+ end
72
+
73
+ # Downloads a file if needed, optionally showing a progress bar.
74
+ #
75
+ # @param :url [String] File URL
76
+ # @param :progress [Boolean] Toggle progress bar
77
+ # @return [String] File local path
78
+ def download(url:, progress: true)
79
+ uri = URI(url)
80
+ dest = uri.path.sub(%r{\A/}, '')
81
+ full = File.join(site.source, dest)
82
+
83
+ return dest if File.exist? full
84
+
85
+ ::Jekyll.logger.info "Downloading #{dest}"
86
+
87
+ FileUtils.mkdir_p File.dirname(full)
88
+
89
+ File.open(full, 'w') do |f|
90
+ if progress
91
+ head = Faraday.head(url)
92
+ content_length = head.headers['content-length'].to_i
93
+ progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr)
94
+ end
95
+
96
+ Faraday.get(url) do |req|
97
+ req.options.on_data = Proc.new do |chunk, downloaded_bytes|
98
+ f.write chunk
99
+
100
+ if progress
101
+ progress.progress = (downloaded_bytes > content_length) ? content_length : downloaded_bytes
102
+ end
103
+ end
104
+ end
105
+ end
106
+
107
+ dest
108
+ end
109
+
110
+ # List post types
111
+ #
112
+ # @return [Array]
113
+ def layouts
114
+ @layouts ||= wp["select distinct post_type from #{prefix}posts"].to_a.map(&:values).flatten
115
+ end
116
+
117
+ # Finds all posts optionally filtering by post type. This is not
118
+ # the official Sequel syntax, but it retrieves metadata as objects
119
+ # with a single query (and a sub-query).
120
+ #
121
+ # @param :layout [String] Layout name, one of #layouts
122
+ # @param :with_meta [Boolean] Toggle metadata pulling and conversion
123
+ # @return [Enumerator]
124
+ def posts(**options)
125
+ unless options[:layout].blank? || layouts.include?(options[:layout])
126
+ raise ArgumentError, "#{options[:layout]} must be one of #{layouts.join(', ')}"
127
+ end
128
+
129
+ wp[post_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |p|
130
+ p.map do |post|
131
+ # Sequel parses dates on localtime
132
+ post[:date] = ::Jekyll::Utils.parse_date(post[:date]) unless post[:date].blank?
133
+ post[:last_modified_at] = ::Jekyll::Utils.parse_date(post[:last_modified_at]) unless post[:last_modified_at].blank?
134
+
135
+ post[:front_matter] = JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values { |v| v.size == 1 ? v.first : v } unless post[:front_matter].blank?
136
+ post[:terms] = JSON.parse(post[:terms]).transform_keys(&:to_sym) unless post[:terms].blank?
137
+ end
138
+ end
139
+ end
140
+
141
+ # Brings all users.
142
+ #
143
+ # @param :with_meta [Boolean] include metadata
144
+ # @return [Array]
145
+ def users(**options)
146
+ options[:with_meta] = true unless options.key? :with_meta
147
+
148
+ wp[user_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |u|
149
+ next unless options[:with_meta]
150
+
151
+ u.map do |user|
152
+ user[:meta] = JSON.parse(user[:meta]).transform_keys(&:to_sym) unless user[:meta].blank?
153
+ end
154
+ end
155
+ end
156
+
157
+ private
158
+
159
+ # Finds all users. If it's a multisite WP, we need to check the
160
+ # main table.
161
+ #
162
+ # @param :with_meta [Boolean] include metadata
163
+ # @return [String]
164
+ def user_query(with_meta: true)
165
+ pfx = multisite&.prefix || prefix
166
+
167
+ <<~EOQ
168
+ select
169
+ u.*
170
+ #{", json_group_object(m.meta_key, m.meta_value) as meta" if with_meta}
171
+ from #{pfx}users as u
172
+ #{"left join #{pfx}usermeta as m on m.user_id = u.id" if with_meta}
173
+ group by u.id
174
+ EOQ
175
+ end
176
+
177
+ # Query for posts, optionally bringing metadata as JSON objects.
178
+ #
179
+ # @param :layout [String] Layout name
180
+ # @param :with_meta [Boolean] Query metadata
181
+ # @return [String]
182
+ def post_query(layout: nil, with_meta: true)
183
+ <<~EOQ
184
+ select
185
+ p.ID as id,
186
+ strftime('%Y-%m-%d %H:%M:%S UTC', p.post_date_gmt) as date,
187
+ strftime('%Y-%m-%d %H:%M:%S UTC', p.post_modified_gmt) as last_modified_at,
188
+ p.post_author as author,
189
+ p.post_type as layout,
190
+ p.post_name as slug,
191
+ p.post_title as title,
192
+ p.post_content as content,
193
+ p.post_excerpt as excerpt,
194
+ p.post_status as status,
195
+ p.comment_status as comment_status,
196
+ p.ping_status as ping_status,
197
+ p.post_password as password,
198
+ p.to_ping as to_ping,
199
+ p.pinged as pinged,
200
+ p.post_content_filtered as content_filtered,
201
+ p.post_parent as parent,
202
+ p.guid as guid,
203
+ p.menu_order as menu_order,
204
+ p.post_mime_type as mime_type,
205
+ p.comment_count as comment_count
206
+ #{", f.front_matter as front_matter" if with_meta}
207
+ #{", t.terms as terms" if with_meta}
208
+ from #{prefix}posts as p
209
+ #{"left join (#{meta_query}) as f on f.post_id = p.ID" if with_meta}
210
+ #{"left join (#{terms_query(layout: layout)}) as t on t.id = p.ID" if with_meta}
211
+ #{"where p.post_type = '#{layout}'" if layout}
212
+ group by p.ID
213
+ EOQ
214
+ end
215
+
216
+ # Recover the post meta as a JSON object with multiple values
217
+ # converted to arrays
218
+ #
219
+ # @return [String]
220
+ def meta_query
221
+ <<~EOQ
222
+ select
223
+ post_id,
224
+ json_group_object(meta_key, json(meta_values)) as front_matter
225
+ from (
226
+ select
227
+ post_id,
228
+ meta_key,
229
+ json_group_array(meta_value) as meta_values
230
+ from #{prefix}postmeta
231
+ group by post_id, meta_key
232
+ )
233
+ group by post_id
234
+ EOQ
235
+ end
236
+
237
+ # Term taxonomy query
238
+ #
239
+ # @param :layout [String] Layout name
240
+ # @return [String]
241
+ def terms_query(layout: nil)
242
+ <<~EOQ
243
+ select
244
+ p.ID as id,
245
+ json_group_object(tt.taxonomy, t.name) as terms
246
+ from #{prefix}posts as p
247
+ left join #{prefix}term_relationships as r on r.object_id = p.ID
248
+ left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id
249
+ left join #{prefix}terms as t on t.term_id = tt.term_id
250
+ #{"where p.post_type = '#{layout}'" if layout}
251
+ group by p.ID
252
+ EOQ
253
+ end
254
+ end
255
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sutty-migration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - f
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-06 00:00:00.000000000 Z
11
+ date: 2021-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jekyll
@@ -52,6 +52,76 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: faraday
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.4'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.4'
69
+ - !ruby/object:Gem::Dependency
70
+ name: progressbar
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.11'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.11'
83
+ - !ruby/object:Gem::Dependency
84
+ name: sqlite3
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.4'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.4'
97
+ - !ruby/object:Gem::Dependency
98
+ name: sequel
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '5.45'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '5.45'
111
+ - !ruby/object:Gem::Dependency
112
+ name: pry
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
55
125
  description: Takes datafiles and converts them into posts
56
126
  email:
57
127
  - f@sutty.nl
@@ -64,7 +134,10 @@ files:
64
134
  - LICENSE.txt
65
135
  - README.md
66
136
  - lib/sutty-migration.rb
67
- - lib/wordpress.rb
137
+ - lib/sutty_migration/core_extensions.rb
138
+ - lib/sutty_migration/data.rb
139
+ - lib/sutty_migration/jekyll/document_creator.rb
140
+ - lib/sutty_migration/wordpress.rb
68
141
  homepage: https://0xacab.org/sutty/jekyll/sutty-migration
69
142
  licenses:
70
143
  - GPL-3.0
data/lib/wordpress.rb DELETED
@@ -1,174 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Debug
4
- require 'pry'
5
- # Generar UUIDs
6
- require 'securerandom'
7
- # Traer resultados de la base de datos
8
- require 'sequel'
9
- require 'sqlite3'
10
- require 'json'
11
- # Limpieza de contenido
12
- require 'loofah'
13
- require 'rails/html/scrubbers'
14
- require 'rails/html/sanitizer'
15
- require 'reverse_markdown'
16
- # Descargar archivos
17
- require 'faraday'
18
-
19
- class Wordpress
20
- attr_reader :site, :prefix, :limit, :url
21
-
22
- def initialize(site:, url:, prefix: 'wp_', limit: 10)
23
- @site = site
24
- @prefix = prefix.freeze
25
- @limit = limit.freeze
26
- @url = url.freeze
27
-
28
- # Conectarse a la base de datos
29
- @wp = Sequel.sqlite(File.join(site.source, '_data', 'wordpress', 'post.sqlite3'))
30
- # Las funciones de JSON usan mucha CPU, vamos a traer de a pocos
31
- # registros.
32
- @wp.extension :pagination
33
- end
34
-
35
- def download(file)
36
- dest = 'wp-content/uploads/' + file
37
- full = File.join(site.source, dest)
38
-
39
- return dest if File.exist? full
40
-
41
- Jekyll.logger.info "Downloading #{dest}"
42
-
43
- FileUtils.mkdir_p File.dirname(full)
44
-
45
- File.open(full, 'w') do |f|
46
- Faraday.get(url + '/' + dest) do |req|
47
- req.options.on_data = Proc.new do |chunk, _|
48
- f.write chunk
49
- end
50
- end
51
- end
52
-
53
- dest
54
- end
55
-
56
- # Obtiene todos los posts opcionalmente filtrando por tipo de post.
57
- # No es la forma oficial de Sequel pero no tenemos tiempo de
58
- # aprenderla específicamente y además tenemos las opciones en formato
59
- # JSON que no estarían soportadas.
60
- def posts(layout: nil)
61
- query = post_query.dup
62
- query += " where post_type = '#{layout}'" if layout
63
- query += ' group by posts.ID'
64
-
65
- @wp[query].each_page(limit)
66
- end
67
-
68
- def meta(id:)
69
- @wp[meta_query(id: id)].to_a
70
- end
71
-
72
- private
73
-
74
- # Obtener todos los posts, json_objectagg requiere mariadb 10.5
75
- def post_query
76
- @post_query ||= <<~EOQ
77
- select ID as id,
78
- post_title as title,
79
- post_name as slug,
80
- post_type as layout,
81
- strftime('%Y-%m-%d', post_date) as date,
82
- post_status as status,
83
- post_content as content,
84
- json_group_object(meta_key, meta_value) as data
85
- from #{prefix}posts as posts
86
- left join #{prefix}postmeta as frontmatter
87
- on posts.ID = frontmatter.post_id
88
- EOQ
89
- end
90
-
91
- def meta_query(id:)
92
- <<~EOQ
93
- SELECT
94
- terms.name AS `name`,
95
- ttax.taxonomy AS `type`,
96
- ttax.parent AS `parent`,
97
- ttax.term_id AS `id`
98
- FROM
99
- #{prefix}terms AS `terms`,
100
- #{prefix}term_relationships AS `trels`,
101
- #{prefix}term_taxonomy AS `ttax`
102
- WHERE
103
- trels.object_id = '#{id}' AND
104
- trels.term_taxonomy_id = ttax.term_taxonomy_id AND
105
- terms.term_id = ttax.term_id
106
- EOQ
107
- end
108
- end
109
-
110
- # Antes de generar el sitio vamos a leer todos los artículos desde la
111
- # base de datos y generarlos localmente.
112
- Jekyll::Hooks.register :site, :post_read do |site|
113
- wp = Wordpress.new(site: site,
114
- url: site.config.dig('wordpress', 'url'),
115
- prefix: site.config.dig('wordpress', 'prefix'))
116
-
117
- collection = site.collections['posts']
118
- ascii_re = Regexp.new("\P{ASCII}").freeze
119
- sanitizer = Rails::Html::SafeListSanitizer.new
120
-
121
- # Traer todas las imágenes cargadas y descargarlas
122
- attachments = wp.posts(layout: 'attachment').map do |page|
123
- page.map do |attachment|
124
- attachment[:data] = JSON.parse(attachment[:data]) unless attachment[:data].nil?
125
- file = attachment.dig(:data, '_wp_attached_file')
126
-
127
- next unless file
128
-
129
- dest = wp.download(file)
130
-
131
- # Tener un mapa de IDs y archivos destino
132
- [ attachment[:id], dest ]
133
- end
134
- end.compact.flatten(1).to_h
135
-
136
- %w[post page].each do |type|
137
- wp.posts(layout: type).each do |page|
138
- page.each do |post|
139
- # Convertir los datos extra en un Hash
140
- post[:data] = JSON.parse(post[:data]) unless post[:data].nil?
141
- post[:slug] = Jekyll::Utils.slugify(post[:title], mode: 'latin') if post[:slug].empty?
142
- post[:meta] = wp.meta id: post[:id]
143
-
144
- path = File.join(site.source, '_posts', post.slice(:date, :slug).values.join('-') + '.markdown')
145
-
146
- if File.exist? path
147
- Jekyll.logger.info "#{path} ya fue migrado, actualizando"
148
-
149
- doc = site.documents.find do |d|
150
- d['id'] == post[:id]
151
- end
152
- else
153
- # Crear un post nuevo y agregarlo a la colección
154
- collection.docs << doc = Jekyll::Document.new(path, site: site, collection: collection)
155
- doc.data['uuid'] = SecureRandom.uuid
156
- end
157
-
158
- thumbnail = post.dig(:data, '_thumbnail_id')&.to_i
159
-
160
- doc.data['layout'] = type
161
- doc.data['title'] = post[:title]
162
- doc.data['draft'] = post[:status] != 'publish'
163
- doc.data['id'] = post[:id]
164
- doc.data['date'] = Jekyll::Utils.parse_date(post[:date])
165
- doc.data['tags'] = post[:meta].select { |k| k[:type] == 'post_tag' }.map { |k| k[:name] }
166
- doc.data['categories'] = post[:meta].select { |k| k[:type] == 'category' }.map { |k| k[:name] }
167
- doc.data['image'] = attachments[thumbnail] if thumbnail
168
-
169
- doc.content = ReverseMarkdown.convert(sanitizer.sanitize(post[:content]))
170
- doc.save
171
- end
172
- end
173
- end
174
- end