wordpress-exporter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +39 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +8 -0
  5. data/CHANGELOG.md +4 -0
  6. data/Gemfile +7 -0
  7. data/Gemfile.lock +75 -0
  8. data/LICENSE +22 -0
  9. data/README.md +71 -0
  10. data/Rakefile +8 -0
  11. data/bin/wordpress-exporter +46 -0
  12. data/lib/cli.rb +11 -0
  13. data/lib/configuration.rb +28 -0
  14. data/lib/converters/content_types_structure_creator.rb +58 -0
  15. data/lib/converters/contentful_model_to_json.rb +119 -0
  16. data/lib/converters/markup_converter.rb +35 -0
  17. data/lib/migrator.rb +30 -0
  18. data/lib/version.rb +3 -0
  19. data/lib/wordpress/blog.rb +83 -0
  20. data/lib/wordpress/category.rb +54 -0
  21. data/lib/wordpress/export.rb +30 -0
  22. data/lib/wordpress/post.rb +94 -0
  23. data/lib/wordpress/post_attachment.rb +44 -0
  24. data/lib/wordpress/post_category_domain.rb +71 -0
  25. data/lib/wordpress/tag.rb +56 -0
  26. data/spec/fixtures/blog/assets/attachment_post/attachment_post_15.json +5 -0
  27. data/spec/fixtures/blog/assets/attachment_post/attachment_post_21.json +5 -0
  28. data/spec/fixtures/blog/entries/blog/blog_1.json +62 -0
  29. data/spec/fixtures/blog/entries/category/category_11599757.json +5 -0
  30. data/spec/fixtures/blog/entries/category/category_14786.json +5 -0
  31. data/spec/fixtures/blog/entries/category/category_2214351.json +5 -0
  32. data/spec/fixtures/blog/entries/category/category_8076.json +5 -0
  33. data/spec/fixtures/blog/entries/post/post_1.json +7 -0
  34. data/spec/fixtures/blog/entries/post/post_11.json +31 -0
  35. data/spec/fixtures/blog/entries/post/post_15.json +11 -0
  36. data/spec/fixtures/blog/entries/post/post_21.json +11 -0
  37. data/spec/fixtures/blog/entries/post/post_3.json +13 -0
  38. data/spec/fixtures/blog/entries/post/post_5.json +13 -0
  39. data/spec/fixtures/blog/entries/post/post_9.json +13 -0
  40. data/spec/fixtures/blog/entries/tag/tag_2656354.json +5 -0
  41. data/spec/fixtures/blog/entries/tag/tag_306830130.json +5 -0
  42. data/spec/fixtures/default_contentful_structure.json +78 -0
  43. data/spec/fixtures/wordpress.xml +551 -0
  44. data/spec/lib/configuration_spec.rb +23 -0
  45. data/spec/lib/converters/markup_converter_spec.rb +27 -0
  46. data/spec/lib/wordpress/blog_spec.rb +64 -0
  47. data/spec/lib/wordpress/category_spec.rb +39 -0
  48. data/spec/lib/wordpress/export_spec.rb +27 -0
  49. data/spec/lib/wordpress/post_category_domain_spec.rb +41 -0
  50. data/spec/lib/wordpress/post_spec.rb +41 -0
  51. data/spec/lib/wordpress/tag_spec.rb +39 -0
  52. data/spec/spec_helper.rb +13 -0
  53. data/spec/support/db_rows_json.rb +5 -0
  54. data/spec/support/shared_configuration.rb +13 -0
  55. data/wordpress_exporter.gemspec +33 -0
  56. data/wordpress_settings/contentful_model.json +288 -0
  57. data/wordpress_settings/contentful_structure.json +78 -0
  58. data/wordpress_settings/default_contentful_structure.json +78 -0
  59. data/wordpress_settings/export_wordpress.xml +380 -0
  60. data/wordpress_settings/wordpress.xml +570 -0
  61. data/wordpress_settings/wordpress_settings.yml +13 -0
  62. metadata +288 -0
@@ -0,0 +1,35 @@
1
+ require 'reverse_markdown'
2
+
3
+ module Contentful
4
+ module Converter
5
+ class MarkupConverter
6
+ Encoding.default_external = 'utf-8'
7
+
8
+ attr_reader :config, :logger
9
+
10
+ def initialize(config)
11
+ @config = config
12
+ @logger = Logger.new(STDOUT)
13
+ end
14
+
15
+ def convert_markup_to_markdown
16
+ Dir.glob("#{config.entries_dir}/post/*") do |post_file_path|
17
+ logger.info("Converting #{post_file_path} markups...")
18
+ convert_post_content(post_file_path)
19
+ end
20
+ end
21
+
22
+ def convert_post_content(post_file_path)
23
+ post_data = JSON.parse(File.read(post_file_path))
24
+ post_data['content'] = ReverseMarkdown.convert post_data['content']
25
+ overwrite_file(post_file_path, post_data)
26
+ end
27
+
28
+ def overwrite_file(path, data)
29
+ File.open(path, 'w') do |file|
30
+ file.write(JSON.pretty_generate(data))
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
data/lib/migrator.rb ADDED
@@ -0,0 +1,30 @@
1
+ require_relative 'configuration'
2
+ require_relative 'wordpress/export'
3
+ require_relative 'converters/contentful_model_to_json'
4
+ require_relative 'converters/markup_converter'
5
+
6
+ class Migrator
7
+ attr_reader :exporter, :settings, :converter, :markup_converter
8
+
9
+ def initialize(settings)
10
+ @settings = Contentful::Configuration.new(settings)
11
+ @exporter = Contentful::Exporter::Wordpress::Export.new(@settings)
12
+ @converter = Contentful::Converter::ContentfulModelToJson.new(@settings)
13
+ @markup_converter = Contentful::Converter::MarkupConverter.new(@settings)
14
+ end
15
+
16
+ def run(action, opts = {})
17
+ case action.to_s
18
+ when '--extract-to-json'
19
+ exporter.export_blog
20
+ omit_flag = opts[:omit_content_model].present?
21
+ converter.create_content_type_json(omit_flag) unless omit_flag
22
+ when '--convert-content-model-to-json'
23
+ converter.convert_to_import_form
24
+ when '--create-contentful-model-from-json'
25
+ converter.create_content_type_json
26
+ when '--convert-markup'
27
+ markup_converter.convert_markup_to_markdown
28
+ end
29
+ end
30
+ end
data/lib/version.rb ADDED
@@ -0,0 +1,3 @@
1
+ module Version
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,83 @@
1
+ require 'time'
2
+ require 'logger'
3
+
4
+ module Contentful
5
+ module Exporter
6
+ module Wordpress
7
+ class Blog
8
+ attr_reader :xml, :settings
9
+
10
+ def initialize(xml_document, settings)
11
+ @xml = xml_document
12
+ @settings = settings
13
+ end
14
+
15
+ def blog_extractor
16
+ create_directory(settings.data_dir)
17
+ extract_blog
18
+ end
19
+
20
+ def link_entry(entries)
21
+ entries.each do |entry|
22
+ entry.keep_if { |key, _v| key if key == :id }
23
+ entry.merge!(type: 'Entry')
24
+ end
25
+ end
26
+
27
+ def link_asset(asset)
28
+ asset.keep_if { |key, _v| key if key == :id }
29
+ asset.merge!(type: 'File')
30
+ end
31
+
32
+ def create_directory(path)
33
+ FileUtils.mkdir_p(path) unless File.directory?(path)
34
+ end
35
+
36
+ def write_json_to_file(path, data)
37
+ File.open(path, 'w') do |file|
38
+ file.write(JSON.pretty_generate(data))
39
+ end
40
+ end
41
+
42
+ def output_logger
43
+ Logger.new(STDOUT)
44
+ end
45
+
46
+ private
47
+
48
+ def extract_blog
49
+ output_logger.info('Extracting blog data...')
50
+ create_directory("#{settings.entries_dir}/blog")
51
+ blog = extracted_data
52
+ write_json_to_file("#{settings.entries_dir}/blog/blog_1.json", blog)
53
+ end
54
+
55
+ def extracted_data
56
+ {
57
+ id: 'blog_id',
58
+ title: title,
59
+ posts: link_entry(posts),
60
+ categories: link_entry(categories),
61
+ tags: link_entry(tags)
62
+ }
63
+ end
64
+
65
+ def posts
66
+ Post.new(xml, settings).post_extractor
67
+ end
68
+
69
+ def categories
70
+ Category.new(xml, settings).categories_extractor
71
+ end
72
+
73
+ def tags
74
+ Tag.new(xml, settings).tags_extractor
75
+ end
76
+
77
+ def title
78
+ xml.at_xpath('//title').text
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,54 @@
1
+ require_relative 'blog'
2
+
3
+ module Contentful
4
+ module Exporter
5
+ module Wordpress
6
+ class Category < Blog
7
+ def initialize(xml, settings)
8
+ @xml = xml
9
+ @settings = settings
10
+ end
11
+
12
+ def categories_extractor
13
+ output_logger.info 'Extracting blog categories...'
14
+ create_directory("#{settings.entries_dir}/category")
15
+ extract_categories
16
+ end
17
+
18
+ private
19
+
20
+ def extract_categories
21
+ categories.each_with_object([]) do |category, categories|
22
+ normalized_category = extracted_category(category)
23
+ write_json_to_file("#{settings.entries_dir}/category/#{id(category)}.json", normalized_category)
24
+ categories << normalized_category
25
+ end
26
+ end
27
+
28
+ def extracted_category(category)
29
+ {
30
+ id: id(category),
31
+ nicename: nice_name(category),
32
+ name: name(category)
33
+ }
34
+ end
35
+
36
+ def categories
37
+ xml.xpath('//wp:category').to_a
38
+ end
39
+
40
+ def id(category)
41
+ "category_#{category.xpath('wp:term_id').text}"
42
+ end
43
+
44
+ def nice_name(category)
45
+ category.xpath('wp:category_nicename').text
46
+ end
47
+
48
+ def name(category)
49
+ category.xpath('wp:cat_name').text
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,30 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'fileutils'
4
+ require 'json'
5
+
6
+ require_relative 'blog'
7
+ require_relative 'post'
8
+ require_relative 'category'
9
+ require_relative 'tag'
10
+ require_relative 'post_category_domain'
11
+ require_relative 'post_attachment'
12
+
13
+ module Contentful
14
+ module Exporter
15
+ module Wordpress
16
+ class Export
17
+ attr_reader :wordpress_xml, :settings
18
+
19
+ def initialize(settings)
20
+ @settings = settings
21
+ @wordpress_xml = Nokogiri::XML(File.open(settings.wordpress_xml))
22
+ end
23
+
24
+ def export_blog
25
+ Blog.new(wordpress_xml, settings).blog_extractor
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,94 @@
1
+ require_relative 'blog'
2
+
3
+ module Contentful
4
+ module Exporter
5
+ module Wordpress
6
+ class Post < Blog
7
+ attr_reader :xml, :settings
8
+
9
+ def initialize(xml, settings)
10
+ @xml = xml
11
+ @settings = settings
12
+ end
13
+
14
+ def post_extractor
15
+ output_logger.info 'Extracting posts...'
16
+ create_directory("#{settings.entries_dir}/post")
17
+ extract_posts
18
+ end
19
+
20
+ def post_id(post)
21
+ "post_#{post.xpath('wp:post_id').text}"
22
+ end
23
+
24
+ private
25
+
26
+ def extract_posts
27
+ posts.each_with_object([]) do |post_xml, posts|
28
+ normalized_post = extract_data(post_xml)
29
+ write_json_to_file("#{settings.entries_dir}/post/#{post_id(post_xml)}.json", normalized_post)
30
+ posts << normalized_post
31
+ end
32
+ end
33
+
34
+ def posts
35
+ xml.xpath('//item').to_a
36
+ end
37
+
38
+ def extract_data(xml_post)
39
+ post_entry = basic_post_data(xml_post)
40
+ assign_content_elements_to_post(xml_post, post_entry)
41
+ post_entry
42
+ end
43
+
44
+ def attachment(xml_post)
45
+ PostAttachment.new(xml_post, settings).attachment_extractor
46
+ end
47
+
48
+ def tags(xml_post)
49
+ PostCategoryDomain.new(xml, xml_post, settings).extract_tags
50
+ end
51
+
52
+ def categories(xml_post)
53
+ PostCategoryDomain.new(xml, xml_post, settings).extract_categories
54
+ end
55
+
56
+ def basic_post_data(xml_post)
57
+ created = Date.strptime(created_at(xml_post))
58
+ {
59
+ id: post_id(xml_post),
60
+ title: title(xml_post),
61
+ wordpress_url: url(xml_post),
62
+ content: content(xml_post),
63
+ created_at: created
64
+ }
65
+ end
66
+
67
+ def assign_content_elements_to_post(xml_post, post_entry)
68
+ attachment = attachment(xml_post)
69
+ tags = link_entry(tags(xml_post))
70
+ categories = link_entry(categories(xml_post))
71
+ post_entry.merge!(attachment: link_asset(attachment)) unless attachment.nil?
72
+ post_entry.merge!(tags: tags) unless tags.empty?
73
+ post_entry.merge!(categories: categories) unless categories.empty?
74
+ end
75
+
76
+ def title(xml_post)
77
+ xml_post.xpath('title').text
78
+ end
79
+
80
+ def url(xml_post)
81
+ xml_post.xpath('link').text
82
+ end
83
+
84
+ def content(xml_post)
85
+ xml_post.xpath('content:encoded').text
86
+ end
87
+
88
+ def created_at(xml_post)
89
+ xml_post.xpath('wp:post_date').text
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,44 @@
1
+ require_relative 'post'
2
+
3
+ module Contentful
4
+ module Exporter
5
+ module Wordpress
6
+ class PostAttachment < Post
7
+ attr_reader :post, :settings
8
+
9
+ def initialize(post, settings)
10
+ @post = post
11
+ @settings = settings
12
+ end
13
+
14
+ def attachment_extractor
15
+ create_directory("#{settings.assets_dir}/attachment_post")
16
+ asset = { id: attachment_id, description: attachment_description, url: attachment_url }
17
+ unless asset[:url].nil?
18
+ write_json_to_file("#{settings.assets_dir}/attachment_post/#{attachment_id}.json", asset)
19
+ asset
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def attachment_url
26
+ post.at_xpath('wp:attachment_url').text unless post.at_xpath('wp:attachment_url').nil?
27
+ end
28
+
29
+ def attachment_id
30
+ "attachment_#{post_id(post)}"
31
+ end
32
+
33
+ def attachment_description
34
+ meta_arr = post.xpath('wp:postmeta').to_a
35
+ unless meta_arr.empty?
36
+ meta_arr.each do |meta|
37
+ return meta.at_xpath('wp:meta_value').text if meta.at_xpath('wp:meta_key').text == '_wp_attachment_image_alt'
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,71 @@
1
+ require_relative 'post'
2
+
3
+ module Contentful
4
+ module Exporter
5
+ module Wordpress
6
+ class PostCategoryDomain < Post
7
+ attr_reader :post, :xml, :settings
8
+
9
+ def initialize(xml, post, settings)
10
+ @xml = xml
11
+ @post = post
12
+ @settings = settings
13
+ end
14
+
15
+ def extract_tags
16
+ output_logger.info 'Extracting post tags...'
17
+ post_domains('category[domain=post_tag]').each_with_object([]) do |tag, tags|
18
+ normalized_tag = normalized_data(tag, '//wp:tag')
19
+ tags << normalized_tag unless normalized_tag.empty?
20
+ end
21
+ end
22
+
23
+ def extract_categories
24
+ output_logger.info 'Extracting post categories...'
25
+ post_domains('category[domain=category]').each_with_object([]) do |category, categories|
26
+ normalized_categories = normalized_data(category, '//wp:category')
27
+ categories << normalized_categories unless normalized_categories.empty?
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def post_domains(domain)
34
+ post.css(domain).to_a
35
+ end
36
+
37
+ def blog_domains(domain)
38
+ xml.xpath(domain).to_a
39
+ end
40
+
41
+ def id(domain, prefix)
42
+ "#{prefix}#{domain.xpath('wp:term_id').text}"
43
+ end
44
+
45
+ def name(domain, name_path)
46
+ domain.xpath(name_path).text
47
+ end
48
+
49
+ def domain_id(domain, domain_path)
50
+ prefix_id = prefix_id(domain_path)
51
+ name_path = domain_path_name(domain_path)
52
+ blog_domains(domain_path).each do |blog_domain|
53
+ return id(blog_domain, prefix_id) if name(blog_domain, name_path) == domain.text
54
+ end
55
+ end
56
+
57
+ def normalized_data(domain, path)
58
+ { id: domain_id(domain, path) }
59
+ end
60
+
61
+ def prefix_id(domain_path)
62
+ '//wp:category' == domain_path ? 'category_' : 'tag_'
63
+ end
64
+
65
+ def domain_path_name(domain_path)
66
+ '//wp:category' == domain_path ? 'wp:cat_name' : 'wp:tag_name'
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end