wordpress-exporter 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +39 -0
- data/.rspec +3 -0
- data/.travis.yml +8 -0
- data/CHANGELOG.md +4 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +75 -0
- data/LICENSE +22 -0
- data/README.md +71 -0
- data/Rakefile +8 -0
- data/bin/wordpress-exporter +46 -0
- data/lib/cli.rb +11 -0
- data/lib/configuration.rb +28 -0
- data/lib/converters/content_types_structure_creator.rb +58 -0
- data/lib/converters/contentful_model_to_json.rb +119 -0
- data/lib/converters/markup_converter.rb +35 -0
- data/lib/migrator.rb +30 -0
- data/lib/version.rb +3 -0
- data/lib/wordpress/blog.rb +83 -0
- data/lib/wordpress/category.rb +54 -0
- data/lib/wordpress/export.rb +30 -0
- data/lib/wordpress/post.rb +94 -0
- data/lib/wordpress/post_attachment.rb +44 -0
- data/lib/wordpress/post_category_domain.rb +71 -0
- data/lib/wordpress/tag.rb +56 -0
- data/spec/fixtures/blog/assets/attachment_post/attachment_post_15.json +5 -0
- data/spec/fixtures/blog/assets/attachment_post/attachment_post_21.json +5 -0
- data/spec/fixtures/blog/entries/blog/blog_1.json +62 -0
- data/spec/fixtures/blog/entries/category/category_11599757.json +5 -0
- data/spec/fixtures/blog/entries/category/category_14786.json +5 -0
- data/spec/fixtures/blog/entries/category/category_2214351.json +5 -0
- data/spec/fixtures/blog/entries/category/category_8076.json +5 -0
- data/spec/fixtures/blog/entries/post/post_1.json +7 -0
- data/spec/fixtures/blog/entries/post/post_11.json +31 -0
- data/spec/fixtures/blog/entries/post/post_15.json +11 -0
- data/spec/fixtures/blog/entries/post/post_21.json +11 -0
- data/spec/fixtures/blog/entries/post/post_3.json +13 -0
- data/spec/fixtures/blog/entries/post/post_5.json +13 -0
- data/spec/fixtures/blog/entries/post/post_9.json +13 -0
- data/spec/fixtures/blog/entries/tag/tag_2656354.json +5 -0
- data/spec/fixtures/blog/entries/tag/tag_306830130.json +5 -0
- data/spec/fixtures/default_contentful_structure.json +78 -0
- data/spec/fixtures/wordpress.xml +551 -0
- data/spec/lib/configuration_spec.rb +23 -0
- data/spec/lib/converters/markup_converter_spec.rb +27 -0
- data/spec/lib/wordpress/blog_spec.rb +64 -0
- data/spec/lib/wordpress/category_spec.rb +39 -0
- data/spec/lib/wordpress/export_spec.rb +27 -0
- data/spec/lib/wordpress/post_category_domain_spec.rb +41 -0
- data/spec/lib/wordpress/post_spec.rb +41 -0
- data/spec/lib/wordpress/tag_spec.rb +39 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/support/db_rows_json.rb +5 -0
- data/spec/support/shared_configuration.rb +13 -0
- data/wordpress_exporter.gemspec +33 -0
- data/wordpress_settings/contentful_model.json +288 -0
- data/wordpress_settings/contentful_structure.json +78 -0
- data/wordpress_settings/default_contentful_structure.json +78 -0
- data/wordpress_settings/export_wordpress.xml +380 -0
- data/wordpress_settings/wordpress.xml +570 -0
- data/wordpress_settings/wordpress_settings.yml +13 -0
- metadata +288 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require 'reverse_markdown'
|
|
2
|
+
|
|
3
|
+
module Contentful
|
|
4
|
+
module Converter
|
|
5
|
+
class MarkupConverter
|
|
6
|
+
Encoding.default_external = 'utf-8'
|
|
7
|
+
|
|
8
|
+
attr_reader :config, :logger
|
|
9
|
+
|
|
10
|
+
def initialize(config)
|
|
11
|
+
@config = config
|
|
12
|
+
@logger = Logger.new(STDOUT)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def convert_markup_to_markdown
|
|
16
|
+
Dir.glob("#{config.entries_dir}/post/*") do |post_file_path|
|
|
17
|
+
logger.info("Converting #{post_file_path} markups...")
|
|
18
|
+
convert_post_content(post_file_path)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def convert_post_content(post_file_path)
|
|
23
|
+
post_data = JSON.parse(File.read(post_file_path))
|
|
24
|
+
post_data['content'] = ReverseMarkdown.convert post_data['content']
|
|
25
|
+
overwrite_file(post_file_path, post_data)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def overwrite_file(path, data)
|
|
29
|
+
File.open(path, 'w') do |file|
|
|
30
|
+
file.write(JSON.pretty_generate(data))
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
data/lib/migrator.rb
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require_relative 'configuration'
|
|
2
|
+
require_relative 'wordpress/export'
|
|
3
|
+
require_relative 'converters/contentful_model_to_json'
|
|
4
|
+
require_relative 'converters/markup_converter'
|
|
5
|
+
|
|
6
|
+
class Migrator
|
|
7
|
+
attr_reader :exporter, :settings, :converter, :markup_converter
|
|
8
|
+
|
|
9
|
+
def initialize(settings)
|
|
10
|
+
@settings = Contentful::Configuration.new(settings)
|
|
11
|
+
@exporter = Contentful::Exporter::Wordpress::Export.new(@settings)
|
|
12
|
+
@converter = Contentful::Converter::ContentfulModelToJson.new(@settings)
|
|
13
|
+
@markup_converter = Contentful::Converter::MarkupConverter.new(@settings)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def run(action, opts = {})
|
|
17
|
+
case action.to_s
|
|
18
|
+
when '--extract-to-json'
|
|
19
|
+
exporter.export_blog
|
|
20
|
+
omit_flag = opts[:omit_content_model].present?
|
|
21
|
+
converter.create_content_type_json(omit_flag) unless omit_flag
|
|
22
|
+
when '--convert-content-model-to-json'
|
|
23
|
+
converter.convert_to_import_form
|
|
24
|
+
when '--create-contentful-model-from-json'
|
|
25
|
+
converter.create_content_type_json
|
|
26
|
+
when '--convert-markup'
|
|
27
|
+
markup_converter.convert_markup_to_markdown
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
data/lib/version.rb
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
require 'logger'
|
|
3
|
+
|
|
4
|
+
module Contentful
|
|
5
|
+
module Exporter
|
|
6
|
+
module Wordpress
|
|
7
|
+
class Blog
|
|
8
|
+
attr_reader :xml, :settings
|
|
9
|
+
|
|
10
|
+
def initialize(xml_document, settings)
|
|
11
|
+
@xml = xml_document
|
|
12
|
+
@settings = settings
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def blog_extractor
|
|
16
|
+
create_directory(settings.data_dir)
|
|
17
|
+
extract_blog
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def link_entry(entries)
|
|
21
|
+
entries.each do |entry|
|
|
22
|
+
entry.keep_if { |key, _v| key if key == :id }
|
|
23
|
+
entry.merge!(type: 'Entry')
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def link_asset(asset)
|
|
28
|
+
asset.keep_if { |key, _v| key if key == :id }
|
|
29
|
+
asset.merge!(type: 'File')
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def create_directory(path)
|
|
33
|
+
FileUtils.mkdir_p(path) unless File.directory?(path)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def write_json_to_file(path, data)
|
|
37
|
+
File.open(path, 'w') do |file|
|
|
38
|
+
file.write(JSON.pretty_generate(data))
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def output_logger
|
|
43
|
+
Logger.new(STDOUT)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def extract_blog
|
|
49
|
+
output_logger.info('Extracting blog data...')
|
|
50
|
+
create_directory("#{settings.entries_dir}/blog")
|
|
51
|
+
blog = extracted_data
|
|
52
|
+
write_json_to_file("#{settings.entries_dir}/blog/blog_1.json", blog)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def extracted_data
|
|
56
|
+
{
|
|
57
|
+
id: 'blog_id',
|
|
58
|
+
title: title,
|
|
59
|
+
posts: link_entry(posts),
|
|
60
|
+
categories: link_entry(categories),
|
|
61
|
+
tags: link_entry(tags)
|
|
62
|
+
}
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def posts
|
|
66
|
+
Post.new(xml, settings).post_extractor
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def categories
|
|
70
|
+
Category.new(xml, settings).categories_extractor
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def tags
|
|
74
|
+
Tag.new(xml, settings).tags_extractor
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def title
|
|
78
|
+
xml.at_xpath('//title').text
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require_relative 'blog'
|
|
2
|
+
|
|
3
|
+
module Contentful
|
|
4
|
+
module Exporter
|
|
5
|
+
module Wordpress
|
|
6
|
+
class Category < Blog
|
|
7
|
+
def initialize(xml, settings)
|
|
8
|
+
@xml = xml
|
|
9
|
+
@settings = settings
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def categories_extractor
|
|
13
|
+
output_logger.info 'Extracting blog categories...'
|
|
14
|
+
create_directory("#{settings.entries_dir}/category")
|
|
15
|
+
extract_categories
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def extract_categories
|
|
21
|
+
categories.each_with_object([]) do |category, categories|
|
|
22
|
+
normalized_category = extracted_category(category)
|
|
23
|
+
write_json_to_file("#{settings.entries_dir}/category/#{id(category)}.json", normalized_category)
|
|
24
|
+
categories << normalized_category
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def extracted_category(category)
|
|
29
|
+
{
|
|
30
|
+
id: id(category),
|
|
31
|
+
nicename: nice_name(category),
|
|
32
|
+
name: name(category)
|
|
33
|
+
}
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def categories
|
|
37
|
+
xml.xpath('//wp:category').to_a
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def id(category)
|
|
41
|
+
"category_#{category.xpath('wp:term_id').text}"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def nice_name(category)
|
|
45
|
+
category.xpath('wp:category_nicename').text
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def name(category)
|
|
49
|
+
category.xpath('wp:cat_name').text
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
require 'open-uri'
|
|
3
|
+
require 'fileutils'
|
|
4
|
+
require 'json'
|
|
5
|
+
|
|
6
|
+
require_relative 'blog'
|
|
7
|
+
require_relative 'post'
|
|
8
|
+
require_relative 'category'
|
|
9
|
+
require_relative 'tag'
|
|
10
|
+
require_relative 'post_category_domain'
|
|
11
|
+
require_relative 'post_attachment'
|
|
12
|
+
|
|
13
|
+
module Contentful
|
|
14
|
+
module Exporter
|
|
15
|
+
module Wordpress
|
|
16
|
+
class Export
|
|
17
|
+
attr_reader :wordpress_xml, :settings
|
|
18
|
+
|
|
19
|
+
def initialize(settings)
|
|
20
|
+
@settings = settings
|
|
21
|
+
@wordpress_xml = Nokogiri::XML(File.open(settings.wordpress_xml))
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def export_blog
|
|
25
|
+
Blog.new(wordpress_xml, settings).blog_extractor
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
require_relative 'blog'
|
|
2
|
+
|
|
3
|
+
module Contentful
|
|
4
|
+
module Exporter
|
|
5
|
+
module Wordpress
|
|
6
|
+
class Post < Blog
|
|
7
|
+
attr_reader :xml, :settings
|
|
8
|
+
|
|
9
|
+
def initialize(xml, settings)
|
|
10
|
+
@xml = xml
|
|
11
|
+
@settings = settings
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def post_extractor
|
|
15
|
+
output_logger.info 'Extracting posts...'
|
|
16
|
+
create_directory("#{settings.entries_dir}/post")
|
|
17
|
+
extract_posts
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def post_id(post)
|
|
21
|
+
"post_#{post.xpath('wp:post_id').text}"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def extract_posts
|
|
27
|
+
posts.each_with_object([]) do |post_xml, posts|
|
|
28
|
+
normalized_post = extract_data(post_xml)
|
|
29
|
+
write_json_to_file("#{settings.entries_dir}/post/#{post_id(post_xml)}.json", normalized_post)
|
|
30
|
+
posts << normalized_post
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def posts
|
|
35
|
+
xml.xpath('//item').to_a
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def extract_data(xml_post)
|
|
39
|
+
post_entry = basic_post_data(xml_post)
|
|
40
|
+
assign_content_elements_to_post(xml_post, post_entry)
|
|
41
|
+
post_entry
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def attachment(xml_post)
|
|
45
|
+
PostAttachment.new(xml_post, settings).attachment_extractor
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def tags(xml_post)
|
|
49
|
+
PostCategoryDomain.new(xml, xml_post, settings).extract_tags
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def categories(xml_post)
|
|
53
|
+
PostCategoryDomain.new(xml, xml_post, settings).extract_categories
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def basic_post_data(xml_post)
|
|
57
|
+
created = Date.strptime(created_at(xml_post))
|
|
58
|
+
{
|
|
59
|
+
id: post_id(xml_post),
|
|
60
|
+
title: title(xml_post),
|
|
61
|
+
wordpress_url: url(xml_post),
|
|
62
|
+
content: content(xml_post),
|
|
63
|
+
created_at: created
|
|
64
|
+
}
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def assign_content_elements_to_post(xml_post, post_entry)
|
|
68
|
+
attachment = attachment(xml_post)
|
|
69
|
+
tags = link_entry(tags(xml_post))
|
|
70
|
+
categories = link_entry(categories(xml_post))
|
|
71
|
+
post_entry.merge!(attachment: link_asset(attachment)) unless attachment.nil?
|
|
72
|
+
post_entry.merge!(tags: tags) unless tags.empty?
|
|
73
|
+
post_entry.merge!(categories: categories) unless categories.empty?
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def title(xml_post)
|
|
77
|
+
xml_post.xpath('title').text
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def url(xml_post)
|
|
81
|
+
xml_post.xpath('link').text
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def content(xml_post)
|
|
85
|
+
xml_post.xpath('content:encoded').text
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def created_at(xml_post)
|
|
89
|
+
xml_post.xpath('wp:post_date').text
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
require_relative 'post'
|
|
2
|
+
|
|
3
|
+
module Contentful
|
|
4
|
+
module Exporter
|
|
5
|
+
module Wordpress
|
|
6
|
+
class PostAttachment < Post
|
|
7
|
+
attr_reader :post, :settings
|
|
8
|
+
|
|
9
|
+
def initialize(post, settings)
|
|
10
|
+
@post = post
|
|
11
|
+
@settings = settings
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def attachment_extractor
|
|
15
|
+
create_directory("#{settings.assets_dir}/attachment_post")
|
|
16
|
+
asset = { id: attachment_id, description: attachment_description, url: attachment_url }
|
|
17
|
+
unless asset[:url].nil?
|
|
18
|
+
write_json_to_file("#{settings.assets_dir}/attachment_post/#{attachment_id}.json", asset)
|
|
19
|
+
asset
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def attachment_url
|
|
26
|
+
post.at_xpath('wp:attachment_url').text unless post.at_xpath('wp:attachment_url').nil?
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def attachment_id
|
|
30
|
+
"attachment_#{post_id(post)}"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def attachment_description
|
|
34
|
+
meta_arr = post.xpath('wp:postmeta').to_a
|
|
35
|
+
unless meta_arr.empty?
|
|
36
|
+
meta_arr.each do |meta|
|
|
37
|
+
return meta.at_xpath('wp:meta_value').text if meta.at_xpath('wp:meta_key').text == '_wp_attachment_image_alt'
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
require_relative 'post'
|
|
2
|
+
|
|
3
|
+
module Contentful
|
|
4
|
+
module Exporter
|
|
5
|
+
module Wordpress
|
|
6
|
+
class PostCategoryDomain < Post
|
|
7
|
+
attr_reader :post, :xml, :settings
|
|
8
|
+
|
|
9
|
+
def initialize(xml, post, settings)
|
|
10
|
+
@xml = xml
|
|
11
|
+
@post = post
|
|
12
|
+
@settings = settings
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def extract_tags
|
|
16
|
+
output_logger.info 'Extracting post tags...'
|
|
17
|
+
post_domains('category[domain=post_tag]').each_with_object([]) do |tag, tags|
|
|
18
|
+
normalized_tag = normalized_data(tag, '//wp:tag')
|
|
19
|
+
tags << normalized_tag unless normalized_tag.empty?
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def extract_categories
|
|
24
|
+
output_logger.info 'Extracting post categories...'
|
|
25
|
+
post_domains('category[domain=category]').each_with_object([]) do |category, categories|
|
|
26
|
+
normalized_categories = normalized_data(category, '//wp:category')
|
|
27
|
+
categories << normalized_categories unless normalized_categories.empty?
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def post_domains(domain)
|
|
34
|
+
post.css(domain).to_a
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def blog_domains(domain)
|
|
38
|
+
xml.xpath(domain).to_a
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def id(domain, prefix)
|
|
42
|
+
"#{prefix}#{domain.xpath('wp:term_id').text}"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def name(domain, name_path)
|
|
46
|
+
domain.xpath(name_path).text
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def domain_id(domain, domain_path)
|
|
50
|
+
prefix_id = prefix_id(domain_path)
|
|
51
|
+
name_path = domain_path_name(domain_path)
|
|
52
|
+
blog_domains(domain_path).each do |blog_domain|
|
|
53
|
+
return id(blog_domain, prefix_id) if name(blog_domain, name_path) == domain.text
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def normalized_data(domain, path)
|
|
58
|
+
{ id: domain_id(domain, path) }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def prefix_id(domain_path)
|
|
62
|
+
'//wp:category' == domain_path ? 'category_' : 'tag_'
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def domain_path_name(domain_path)
|
|
66
|
+
'//wp:category' == domain_path ? 'wp:cat_name' : 'wp:tag_name'
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|