refinerycms-wordpress-import 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +52 -4
- data/lib/tasks/wordpress.rake +45 -2
- data/lib/wordpress/dump.rb +5 -2
- data/lib/wordpress/page.rb +34 -18
- data/lib/wordpress/post.rb +19 -0
- metadata +61 -68
data/README.rdoc
CHANGED
@@ -2,19 +2,31 @@
|
|
2
2
|
|
3
3
|
This litte project is an importer for WordPress XML dumps into refinerycms(-blog).
|
4
4
|
|
5
|
-
So far, only blog-relevant data gets imported, I'm working on the cms pages part.
|
6
|
-
|
7
5
|
You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import
|
8
6
|
|
7
|
+
Keep in mind, this gem imports blog posts and pages, NOT the media files, as they are not
|
8
|
+
part of the XML dump! You have to manually readd them to Refinery.
|
9
|
+
|
10
|
+
The same goes for links to other pages on your site. WordPress exports them just as <a>-Tags.
|
11
|
+
If your site (blog) structure uses new urls, the links WILL break! For example, if you used
|
12
|
+
the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug".
|
13
|
+
So your inner site links will point to the old WP url.
|
14
|
+
|
15
|
+
|
9
16
|
== Prerequisites
|
10
17
|
|
11
18
|
As refinerycms-wordpress-import is an addon for RefineryCMS, is shares the prerequisites with it.
|
12
19
|
So you'll first need a running installation of refinerycms and refinerycms-blog. Make sure
|
13
20
|
the site is running, all migrations are run and you created the first refinery user.
|
14
21
|
|
22
|
+
|
15
23
|
== Installation
|
16
24
|
|
17
|
-
|
25
|
+
Just add the gem to your projects Gemfile:
|
26
|
+
|
27
|
+
gem 'refinerycms-wordpress-import'
|
28
|
+
|
29
|
+
Or if you want to stay on the bleeding edge:
|
18
30
|
|
19
31
|
gem 'refinerycms-wordpress-import', :git => 'git://github.com/mremolt/refinerycms-wordpress-import.git'
|
20
32
|
|
@@ -22,9 +34,10 @@ and run
|
|
22
34
|
|
23
35
|
bundle
|
24
36
|
|
37
|
+
|
25
38
|
== Usage
|
26
39
|
|
27
|
-
Importing the XML dump is done via
|
40
|
+
Importing the XML dump is done via rake tasks:
|
28
41
|
|
29
42
|
rake wordpress:reset_blog
|
30
43
|
|
@@ -49,6 +62,41 @@ The task will then skip all posts that are not published.
|
|
49
62
|
|
50
63
|
This one combines the two previous tasks.
|
51
64
|
|
65
|
+
If you also want to import the cms part of WordPress, three more rake tasks manage
|
66
|
+
the import into RefineryCMS Pages:
|
67
|
+
|
68
|
+
rake wordpress:reset_pages
|
69
|
+
|
70
|
+
This task deletes all data from the cms tables, ensuring a clean import. Otherwise existing
|
71
|
+
pages could break the import because of duplicate IDs.
|
72
|
+
|
73
|
+
rake wordpress:import_pages[file_name]
|
74
|
+
|
75
|
+
This task imports all the WordPress pages into Refinery. The page structure (parent - child)
|
76
|
+
is preserved.
|
77
|
+
|
78
|
+
If you want to skip the draft pages, add the ONLY_PUBLISHED parameter to this task,
|
79
|
+
just like with wordpress:import_blog.
|
80
|
+
|
81
|
+
rake wordpress:import_pages[file_name] ONLY_PUBLISHED=true
|
82
|
+
|
83
|
+
If you want to clean the tables and import in one task:
|
84
|
+
|
85
|
+
rake wordpress:reset_and_import_pages[file_name]
|
86
|
+
|
87
|
+
|
88
|
+
== Usage on ZSH
|
89
|
+
|
90
|
+
One more hint for users of zsh (like myself):
|
91
|
+
|
92
|
+
The square brackets following the rake task need to be escaped on zsh, as they have a
|
93
|
+
special meaning there. So the syntax is:
|
94
|
+
|
95
|
+
rake wordpress:reset_and_import_blog\[file_name\]
|
96
|
+
|
97
|
+
Ugly, but it works. This is the case for all rake tasks by the way, not just mine.
|
98
|
+
|
99
|
+
|
52
100
|
== Feedback
|
53
101
|
|
54
102
|
This is still a very new gem. It manages to import my own blog and a standard WordPress 3.1 dump with some sample posts.
|
data/lib/tasks/wordpress.rake
CHANGED
@@ -5,7 +5,8 @@ namespace :wordpress do
|
|
5
5
|
task :reset_blog do
|
6
6
|
Rake::Task["environment"].invoke
|
7
7
|
|
8
|
-
%w(taggings tags blog_comments blog_categories blog_categories_blog_posts
|
8
|
+
%w(taggings tags blog_comments blog_categories blog_categories_blog_posts
|
9
|
+
blog_posts).each do |table_name|
|
9
10
|
p "Truncating #{table_name} ..."
|
10
11
|
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
11
12
|
end
|
@@ -22,12 +23,13 @@ namespace :wordpress do
|
|
22
23
|
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
|
23
24
|
dump.posts(only_published).each(&:to_refinery)
|
24
25
|
|
26
|
+
Refinery::WordPress::Post.create_blog_page_if_necessary
|
27
|
+
|
25
28
|
ENV["MODEL"] = 'BlogPost'
|
26
29
|
Rake::Task["friendly_id:redo_slugs"].invoke
|
27
30
|
ENV.delete("MODEL")
|
28
31
|
end
|
29
32
|
|
30
|
-
|
31
33
|
desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump"
|
32
34
|
task :reset_and_import_blog, :file_name do |task, params|
|
33
35
|
Rake::Task["environment"].invoke
|
@@ -35,4 +37,45 @@ namespace :wordpress do
|
|
35
37
|
Rake::Task["wordpress:import_blog"].invoke(params[:file_name])
|
36
38
|
end
|
37
39
|
|
40
|
+
|
41
|
+
desc "Reset the cms relevant tables for a clean import"
|
42
|
+
task :reset_pages do
|
43
|
+
Rake::Task["environment"].invoke
|
44
|
+
|
45
|
+
%w(page_part_translations page_translations page_parts pages).each do |table_name|
|
46
|
+
p "Truncating #{table_name} ..."
|
47
|
+
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
desc "import cms data from a Refinery::WordPress XML dump"
|
52
|
+
task :import_pages, :file_name do |task, params|
|
53
|
+
Rake::Task["environment"].invoke
|
54
|
+
dump = Refinery::WordPress::Dump.new(params[:file_name])
|
55
|
+
|
56
|
+
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
|
57
|
+
dump.pages(only_published).each(&:to_refinery)
|
58
|
+
|
59
|
+
# After all pages are persisted we can now create the parent - child
|
60
|
+
# relationships. This is necessary, as WordPress doesn't dump the pages in
|
61
|
+
# a correct order.
|
62
|
+
dump.pages(only_published).each do |dump_page|
|
63
|
+
page = ::Page.find(dump_page.post_id)
|
64
|
+
page.parent_id = dump_page.parent_id
|
65
|
+
page.save!
|
66
|
+
end
|
67
|
+
|
68
|
+
Refinery::WordPress::Post.create_blog_page_if_necessary
|
69
|
+
|
70
|
+
ENV["MODEL"] = 'Page'
|
71
|
+
Rake::Task["friendly_id:redo_slugs"].invoke
|
72
|
+
ENV.delete("MODEL")
|
73
|
+
end
|
74
|
+
|
75
|
+
desc "reset cms tables and then import cms data from a Refinery::WordPress XML dump"
|
76
|
+
task :reset_and_import_pages, :file_name do |task, params|
|
77
|
+
Rake::Task["environment"].invoke
|
78
|
+
Rake::Task["wordpress:reset_pages"].invoke
|
79
|
+
Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
|
80
|
+
end
|
38
81
|
end
|
data/lib/wordpress/dump.rb
CHANGED
@@ -19,10 +19,13 @@ module Refinery
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
-
def pages
|
23
|
-
doc.xpath("//item[wp:post_type = 'page']").collect do |page|
|
22
|
+
def pages(only_published=false)
|
23
|
+
pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
|
24
24
|
Page.new(page)
|
25
25
|
end
|
26
|
+
|
27
|
+
pages = pages.select(&:published?) if only_published
|
28
|
+
pages
|
26
29
|
end
|
27
30
|
|
28
31
|
def posts(only_published=false)
|
data/lib/wordpress/page.rb
CHANGED
@@ -23,23 +23,10 @@ module Refinery
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def content_formatted
|
26
|
-
|
27
|
-
# the content. As we trust ourselves, no sanatize.
|
28
|
-
formatted = simple_format(content, {}, { :sanitize => false })
|
29
|
-
|
30
|
-
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
|
31
|
-
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
|
32
|
-
# blocks, which are converted to a <pre>-tag with a class corresponding
|
33
|
-
# to the language.
|
34
|
-
#
|
35
|
-
# Example:
|
36
|
-
# [ruby]p "Hello World"[/ruby]
|
37
|
-
# -> <pre class="brush: ruby">p "Hello world"</pre>
|
38
|
-
formatted.gsub!(/\[(\w+)\]/, '<pre class="brush: \1">')
|
39
|
-
formatted.gsub!(/\[\/\w+\]/, '</pre>')
|
26
|
+
formatted = format_syntax_highlighter(format_paragraphs(content))
|
40
27
|
|
41
28
|
# remove all tags inside <pre> that simple_format created
|
42
|
-
# TODO: replace
|
29
|
+
# TODO: replace format_paragraphs with a method, that ignores pre-tags
|
43
30
|
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
|
44
31
|
"#{$1}#{strip_tags($2)}#{$3}"
|
45
32
|
end
|
@@ -60,7 +47,8 @@ module Refinery
|
|
60
47
|
end
|
61
48
|
|
62
49
|
def parent_id
|
63
|
-
node.xpath("wp:post_parent").text.to_i
|
50
|
+
dump_id = node.xpath("wp:post_parent").text.to_i
|
51
|
+
dump_id == 0 ? nil : dump_id
|
64
52
|
end
|
65
53
|
|
66
54
|
def status
|
@@ -80,12 +68,40 @@ module Refinery
|
|
80
68
|
end
|
81
69
|
|
82
70
|
def to_refinery
|
83
|
-
page = ::Page.create!(:
|
84
|
-
|
71
|
+
page = ::Page.create!(:id => post_id, :title => title,
|
72
|
+
:created_at => post_date, :draft => draft?)
|
85
73
|
|
86
74
|
page.parts.create(:title => 'Body', :body => content_formatted)
|
87
75
|
page
|
88
76
|
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def format_paragraphs(text, html_options={})
|
81
|
+
# WordPress doesn't export <p>-Tags, so let's run a simple_format over
|
82
|
+
# the content. As we trust ourselves, no sanatize. This code is heavily
|
83
|
+
# inspired by the simple_format rails helper
|
84
|
+
text = ''.html_safe if text.nil?
|
85
|
+
start_tag = tag('p', html_options, true)
|
86
|
+
|
87
|
+
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
|
88
|
+
text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}") # 2+ newline -> paragraph
|
89
|
+
text.insert 0, start_tag
|
90
|
+
|
91
|
+
text.html_safe.safe_concat("</p>")
|
92
|
+
end
|
93
|
+
|
94
|
+
def format_syntax_highlighter(text)
|
95
|
+
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
|
96
|
+
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
|
97
|
+
# blocks, which are converted to a <pre>-tag with a class corresponding
|
98
|
+
# to the language.
|
99
|
+
#
|
100
|
+
# Example:
|
101
|
+
# [ruby]p "Hello World"[/ruby]
|
102
|
+
# -> <pre class="brush: ruby">p "Hello world"</pre>
|
103
|
+
text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
|
104
|
+
end
|
89
105
|
end
|
90
106
|
end
|
91
107
|
end
|
data/lib/wordpress/post.rb
CHANGED
@@ -61,6 +61,25 @@ module Refinery
|
|
61
61
|
|
62
62
|
post
|
63
63
|
end
|
64
|
+
|
65
|
+
def self.create_blog_page_if_necessary
|
66
|
+
# refinerycms wants a page at /blog, so let's make sure there is one
|
67
|
+
# taken from the original db seeds from refinery-blog
|
68
|
+
unless ::Page.where("link_url = ?", '/blog').exists?
|
69
|
+
page = ::Page.create(
|
70
|
+
:title => "Blog",
|
71
|
+
:link_url => "/blog",
|
72
|
+
:deletable => false,
|
73
|
+
:position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
|
74
|
+
:menu_match => "^/blogs?(\/|\/.+?|)$"
|
75
|
+
)
|
76
|
+
|
77
|
+
::Page.default_parts.each do |default_page_part|
|
78
|
+
page.parts.create(:title => default_page_part, :body => nil)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
64
83
|
end
|
65
84
|
end
|
66
85
|
end
|
metadata
CHANGED
@@ -1,93 +1,90 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: refinerycms-wordpress-import
|
3
|
-
version: !ruby/object:Gem::Version
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
4
5
|
prerelease:
|
5
|
-
version: 0.1.0
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Marc Remolt
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
date: 2011-06-03 00:00:00 +02:00
|
12
|
+
date: 2011-06-05 00:00:00.000000000 +02:00
|
14
13
|
default_executable:
|
15
|
-
dependencies:
|
16
|
-
- !ruby/object:Gem::Dependency
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
17
16
|
name: bundler
|
18
|
-
|
19
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: &14624860 !ruby/object:Gem::Requirement
|
20
18
|
none: false
|
21
|
-
requirements:
|
19
|
+
requirements:
|
22
20
|
- - ~>
|
23
|
-
- !ruby/object:Gem::Version
|
24
|
-
version:
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '1.0'
|
25
23
|
type: :runtime
|
26
|
-
version_requirements: *id001
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: refinerycms
|
29
24
|
prerelease: false
|
30
|
-
|
25
|
+
version_requirements: *14624860
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: refinerycms
|
28
|
+
requirement: &14624380 !ruby/object:Gem::Requirement
|
31
29
|
none: false
|
32
|
-
requirements:
|
30
|
+
requirements:
|
33
31
|
- - ~>
|
34
|
-
- !ruby/object:Gem::Version
|
32
|
+
- !ruby/object:Gem::Version
|
35
33
|
version: 1.0.0
|
36
34
|
type: :runtime
|
37
|
-
version_requirements: *id002
|
38
|
-
- !ruby/object:Gem::Dependency
|
39
|
-
name: refinerycms-blog
|
40
35
|
prerelease: false
|
41
|
-
|
36
|
+
version_requirements: *14624380
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: refinerycms-blog
|
39
|
+
requirement: &14623920 !ruby/object:Gem::Requirement
|
42
40
|
none: false
|
43
|
-
requirements:
|
41
|
+
requirements:
|
44
42
|
- - ~>
|
45
|
-
- !ruby/object:Gem::Version
|
43
|
+
- !ruby/object:Gem::Version
|
46
44
|
version: 1.5.2
|
47
45
|
type: :runtime
|
48
|
-
version_requirements: *id003
|
49
|
-
- !ruby/object:Gem::Dependency
|
50
|
-
name: nokogiri
|
51
46
|
prerelease: false
|
52
|
-
|
47
|
+
version_requirements: *14623920
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: nokogiri
|
50
|
+
requirement: &14623460 !ruby/object:Gem::Requirement
|
53
51
|
none: false
|
54
|
-
requirements:
|
52
|
+
requirements:
|
55
53
|
- - ~>
|
56
|
-
- !ruby/object:Gem::Version
|
54
|
+
- !ruby/object:Gem::Version
|
57
55
|
version: 1.4.4
|
58
56
|
type: :runtime
|
59
|
-
version_requirements: *id004
|
60
|
-
- !ruby/object:Gem::Dependency
|
61
|
-
name: rspec-rails
|
62
57
|
prerelease: false
|
63
|
-
|
58
|
+
version_requirements: *14623460
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: rspec-rails
|
61
|
+
requirement: &14623080 !ruby/object:Gem::Requirement
|
64
62
|
none: false
|
65
|
-
requirements:
|
66
|
-
- -
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version:
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
69
67
|
type: :development
|
70
|
-
version_requirements: *id005
|
71
|
-
- !ruby/object:Gem::Dependency
|
72
|
-
name: database_cleaner
|
73
68
|
prerelease: false
|
74
|
-
|
69
|
+
version_requirements: *14623080
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: database_cleaner
|
72
|
+
requirement: &14622620 !ruby/object:Gem::Requirement
|
75
73
|
none: false
|
76
|
-
requirements:
|
77
|
-
- -
|
78
|
-
- !ruby/object:Gem::Version
|
79
|
-
version:
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
80
78
|
type: :development
|
81
|
-
|
82
|
-
|
79
|
+
prerelease: false
|
80
|
+
version_requirements: *14622620
|
81
|
+
description: This gem imports a WordPress XML dump into refinerycms (Page, User) and
|
82
|
+
refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)
|
83
83
|
email: marc.remolt@googlemail.com
|
84
84
|
executables: []
|
85
|
-
|
86
85
|
extensions: []
|
87
|
-
|
88
86
|
extra_rdoc_files: []
|
89
|
-
|
90
|
-
files:
|
87
|
+
files:
|
91
88
|
- lib/wordpress.rb
|
92
89
|
- lib/wordpress/page.rb
|
93
90
|
- lib/wordpress/author.rb
|
@@ -106,30 +103,26 @@ files:
|
|
106
103
|
has_rdoc: true
|
107
104
|
homepage: https://github.com/mremolt/refinerycms-wordpress-import
|
108
105
|
licenses: []
|
109
|
-
|
110
106
|
post_install_message:
|
111
107
|
rdoc_options: []
|
112
|
-
|
113
|
-
require_paths:
|
108
|
+
require_paths:
|
114
109
|
- lib
|
115
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
116
111
|
none: false
|
117
|
-
requirements:
|
118
|
-
- -
|
119
|
-
- !ruby/object:Gem::Version
|
120
|
-
version:
|
121
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - ! '>='
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: '0'
|
116
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
117
|
none: false
|
123
|
-
requirements:
|
124
|
-
- -
|
125
|
-
- !ruby/object:Gem::Version
|
126
|
-
version:
|
118
|
+
requirements:
|
119
|
+
- - ! '>='
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
127
122
|
requirements: []
|
128
|
-
|
129
123
|
rubyforge_project:
|
130
124
|
rubygems_version: 1.6.2
|
131
125
|
signing_key:
|
132
126
|
specification_version: 3
|
133
127
|
summary: Import WordPress XML dumps into refinerycms(-blog).
|
134
128
|
test_files: []
|
135
|
-
|