refinerycms-wordpress-import 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +10 -2
- data/README.rdoc +34 -4
- data/lib/tasks/wordpress.rake +42 -2
- data/lib/wordpress.rb +11 -10
- data/lib/wordpress/attachment.rb +130 -0
- data/lib/wordpress/dump.rb +6 -0
- metadata +22 -21
data/Gemfile
CHANGED
@@ -3,9 +3,17 @@ source "http://rubygems.org"
|
|
3
3
|
gem "rails", "3.0.7"
|
4
4
|
#gem "capybara", ">= 1.0.0.beta1"
|
5
5
|
gem "sqlite3"
|
6
|
+
gem "rmagick"
|
6
7
|
|
7
|
-
|
8
|
-
gem "
|
8
|
+
group :development, :test do
|
9
|
+
gem "rspec-rails", ">= 2.6.0"
|
10
|
+
gem "database_cleaner"
|
11
|
+
gem 'guard-rspec'
|
12
|
+
gem 'ffi'
|
13
|
+
gem 'guard-bundler'
|
14
|
+
gem 'libnotify' if RUBY_PLATFORM =~ /linux/i
|
15
|
+
gem 'fakeweb'
|
16
|
+
end
|
9
17
|
|
10
18
|
# To use debugger (ruby-debug for Ruby 1.8.7+, ruby-debug19 for Ruby 1.9.2+)
|
11
19
|
# gem 'ruby-debug'
|
data/README.rdoc
CHANGED
@@ -4,10 +4,7 @@ This litte project is an importer for WordPress XML dumps into refinerycms(-blog
|
|
4
4
|
|
5
5
|
You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import
|
6
6
|
|
7
|
-
Keep in mind
|
8
|
-
part of the XML dump! You have to manually readd them to Refinery.
|
9
|
-
|
10
|
-
The same goes for links to other pages on your site. WordPress exports them just as <a>-Tags.
|
7
|
+
Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags.
|
11
8
|
If your site (blog) structure uses new urls, the links WILL break! For example, if you used
|
12
9
|
the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug".
|
13
10
|
So your inner site links will point to the old WP url.
|
@@ -84,6 +81,39 @@ If you want to clean the tables and import in one task:
|
|
84
81
|
|
85
82
|
rake wordpress:reset_and_import_pages[file_name]
|
86
83
|
|
84
|
+
Finally, if you want to reset and import all data including media (see below):
|
85
|
+
|
86
|
+
rake wordpress:full_import[file_name]
|
87
|
+
|
88
|
+
|
89
|
+
== Importing media files
|
90
|
+
|
91
|
+
The WP XML dump contains absolute links to media files linked inside posts, like:
|
92
|
+
|
93
|
+
www.mysite.com/wordpress/wp-content/uploads/2011/05/cv.txt
|
94
|
+
|
95
|
+
The dump does NOT contain the files itself! To get them imported, this gem downloads the files
|
96
|
+
from the given URL and imports them to refinery. So for a working media import the old site with
|
97
|
+
the media URLs must still be online.
|
98
|
+
|
99
|
+
After importing the files, this gem replaces the old links in pages and blog posts with the
|
100
|
+
new generated ones. It parses all existing records searching for the right pattern. That
|
101
|
+
means, you have to import pages and posts FIRST to get the URLs replaced.
|
102
|
+
|
103
|
+
Now to the rake tasks for media import:
|
104
|
+
|
105
|
+
rake wordpress:reset_media
|
106
|
+
|
107
|
+
This task deletes all data from the media tables (images and resources), ensuring a clean import.
|
108
|
+
|
109
|
+
rake wordpress:import_and_replace_media[file_name]
|
110
|
+
|
111
|
+
This task imports all the WordPress media into Refinery. After the import it parses all
|
112
|
+
pages and blog posts, replacing the legacy links with the current refinery ones.
|
113
|
+
|
114
|
+
If you want to clean the tables and import in one task:
|
115
|
+
|
116
|
+
rake wordpress:reset_import_and_replace_media[file_name]
|
87
117
|
|
88
118
|
== Usage on ZSH
|
89
119
|
|
data/lib/tasks/wordpress.rake
CHANGED
@@ -48,7 +48,7 @@ namespace :wordpress do
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
desc "import cms data from a
|
51
|
+
desc "import cms data from a WordPress XML dump"
|
52
52
|
task :import_pages, :file_name do |task, params|
|
53
53
|
Rake::Task["environment"].invoke
|
54
54
|
dump = Refinery::WordPress::Dump.new(params[:file_name])
|
@@ -72,10 +72,50 @@ namespace :wordpress do
|
|
72
72
|
ENV.delete("MODEL")
|
73
73
|
end
|
74
74
|
|
75
|
-
desc "reset cms tables and then import cms data from a
|
75
|
+
desc "reset cms tables and then import cms data from a WordPress XML dump"
|
76
76
|
task :reset_and_import_pages, :file_name do |task, params|
|
77
77
|
Rake::Task["environment"].invoke
|
78
78
|
Rake::Task["wordpress:reset_pages"].invoke
|
79
79
|
Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
|
80
80
|
end
|
81
|
+
|
82
|
+
|
83
|
+
desc "Reset the media relevant tables for a clean import"
|
84
|
+
task :reset_media do
|
85
|
+
Rake::Task["environment"].invoke
|
86
|
+
|
87
|
+
%w(images resources).each do |table_name|
|
88
|
+
p "Truncating #{table_name} ..."
|
89
|
+
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
|
94
|
+
task :import_and_replace_media, :file_name do |task, params|
|
95
|
+
Rake::Task["environment"].invoke
|
96
|
+
dump = Refinery::WordPress::Dump.new(params[:file_name])
|
97
|
+
|
98
|
+
attachments = dump.attachments.each(&:to_refinery)
|
99
|
+
|
100
|
+
# parse all created BlogPost and Page bodys and replace the old wordpress media uls
|
101
|
+
# with the newly created ones
|
102
|
+
attachments.each do |attachment|
|
103
|
+
attachment.replace_url
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
desc "reset media tables and then import media data from a WordPress XML dump"
|
108
|
+
task :reset_import_and_replace_media, :file_name do |task, params|
|
109
|
+
Rake::Task["environment"].invoke
|
110
|
+
Rake::Task["wordpress:reset_media"].invoke
|
111
|
+
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
|
112
|
+
end
|
113
|
+
|
114
|
+
desc "reset and import all data (see the other tasks)"
|
115
|
+
task :full_import, :file_name do |task, params|
|
116
|
+
Rake::Task["environment"].invoke
|
117
|
+
Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name])
|
118
|
+
Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
|
119
|
+
Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
|
120
|
+
end
|
81
121
|
end
|
data/lib/wordpress.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require "wordpress/railtie"
|
3
|
+
|
1
4
|
module Refinery
|
2
5
|
module WordPress
|
6
|
+
autoload :Author, 'wordpress/author'
|
7
|
+
autoload :Tag, 'wordpress/tag'
|
8
|
+
autoload :Category, 'wordpress/category'
|
9
|
+
autoload :Page, 'wordpress/page'
|
10
|
+
autoload :Post, 'wordpress/post'
|
11
|
+
autoload :Comment, 'wordpress/comment'
|
12
|
+
autoload :Dump, 'wordpress/dump'
|
13
|
+
autoload :Attachment, 'wordpress/attachment'
|
3
14
|
end
|
4
15
|
end
|
5
16
|
|
6
|
-
require 'nokogiri'
|
7
|
-
require 'wordpress/author'
|
8
|
-
require 'wordpress/tag'
|
9
|
-
require 'wordpress/category'
|
10
|
-
require 'wordpress/page'
|
11
|
-
require 'wordpress/post'
|
12
|
-
require 'wordpress/comment'
|
13
|
-
require 'wordpress/dump'
|
14
|
-
|
15
|
-
require "wordpress/railtie"
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module Refinery
|
2
|
+
module WordPress
|
3
|
+
class Attachment
|
4
|
+
attr_reader :node
|
5
|
+
attr_reader :refinery_image
|
6
|
+
attr_reader :refinery_resource
|
7
|
+
|
8
|
+
def initialize(node)
|
9
|
+
@node = node
|
10
|
+
end
|
11
|
+
|
12
|
+
def title
|
13
|
+
node.xpath("title").text
|
14
|
+
end
|
15
|
+
|
16
|
+
def description
|
17
|
+
node.xpath("description").text
|
18
|
+
end
|
19
|
+
|
20
|
+
def file_name
|
21
|
+
url.split('/').last
|
22
|
+
end
|
23
|
+
|
24
|
+
def post_date
|
25
|
+
DateTime.parse node.xpath("wp:post_date").text
|
26
|
+
end
|
27
|
+
|
28
|
+
def url
|
29
|
+
node.xpath("wp:attachment_url").text
|
30
|
+
end
|
31
|
+
|
32
|
+
def url_pattern
|
33
|
+
url_parts = url.split('.')
|
34
|
+
extension = url_parts.pop
|
35
|
+
url_without_extension = url_parts.join('.')
|
36
|
+
|
37
|
+
/#{url_without_extension}(-\d+x\d+)?\.#{extension}/
|
38
|
+
end
|
39
|
+
|
40
|
+
def image?
|
41
|
+
url.match /\.(png|jpg|jpeg|gif)$/
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_refinery
|
45
|
+
if image?
|
46
|
+
to_image
|
47
|
+
else
|
48
|
+
to_resource
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def replace_url
|
53
|
+
if image?
|
54
|
+
replace_image_url
|
55
|
+
else
|
56
|
+
replace_resource_url
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def to_image
|
63
|
+
image = ::Image.new
|
64
|
+
image.created_at = post_date
|
65
|
+
image.image_url = url
|
66
|
+
image.save!
|
67
|
+
|
68
|
+
@refinery_image = image
|
69
|
+
image
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_resource
|
73
|
+
resource = ::Resource.new
|
74
|
+
resource.created_at = post_date
|
75
|
+
resource.file_url = url
|
76
|
+
resource.save!
|
77
|
+
|
78
|
+
@refinery_resource = resource
|
79
|
+
resource
|
80
|
+
end
|
81
|
+
|
82
|
+
def replace_image_url
|
83
|
+
replace_image_url_in_blog_posts
|
84
|
+
replace_image_url_in_pages
|
85
|
+
end
|
86
|
+
|
87
|
+
def replace_resource_url
|
88
|
+
replace_resource_url_in_blog_posts
|
89
|
+
replace_resource_url_in_pages
|
90
|
+
end
|
91
|
+
|
92
|
+
def replace_image_url_in_blog_posts
|
93
|
+
replace_url_in_blog_posts(refinery_image.image.url)
|
94
|
+
end
|
95
|
+
|
96
|
+
def replace_image_url_in_pages
|
97
|
+
replace_url_in_pages(refinery_image.image.url)
|
98
|
+
end
|
99
|
+
|
100
|
+
def replace_resource_url_in_blog_posts
|
101
|
+
replace_url_in_blog_posts(refinery_resource.file.url)
|
102
|
+
end
|
103
|
+
|
104
|
+
def replace_resource_url_in_pages
|
105
|
+
replace_url_in_pages(refinery_resource.file.url)
|
106
|
+
end
|
107
|
+
|
108
|
+
def replace_url_in_blog_posts(new_url)
|
109
|
+
::BlogPost.all.each do |post|
|
110
|
+
if (! post.body.empty?) && post.body.include?(url)
|
111
|
+
post.body = post.body.gsub(url_pattern, new_url)
|
112
|
+
post.save!
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def replace_url_in_pages(new_url)
|
118
|
+
::Page.all.each do |page|
|
119
|
+
page.parts.each do |part|
|
120
|
+
if (! part.body.to_s.blank?) && part.body.include?(url)
|
121
|
+
part.body = part.body.gsub(url_pattern, new_url)
|
122
|
+
part.save!
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
data/lib/wordpress/dump.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: refinerycms-wordpress-import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-06-
|
12
|
+
date: 2011-06-13 00:00:00.000000000 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|
17
|
-
requirement: &
|
17
|
+
requirement: &17167700 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '1.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *17167700
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: refinerycms
|
28
|
-
requirement: &
|
28
|
+
requirement: &17167080 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.0.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *17167080
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: refinerycms-blog
|
39
|
-
requirement: &
|
39
|
+
requirement: &17166340 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ~>
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 1.5.2
|
45
45
|
type: :runtime
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *17166340
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: nokogiri
|
50
|
-
requirement: &
|
50
|
+
requirement: &17165720 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ~>
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 1.4.4
|
56
56
|
type: :runtime
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *17165720
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: rspec-rails
|
61
|
-
requirement: &
|
61
|
+
requirement: &17142300 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ! '>='
|
@@ -66,10 +66,10 @@ dependencies:
|
|
66
66
|
version: '0'
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *17142300
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: database_cleaner
|
72
|
-
requirement: &
|
72
|
+
requirement: &17141620 !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
75
|
- - ! '>='
|
@@ -77,7 +77,7 @@ dependencies:
|
|
77
77
|
version: '0'
|
78
78
|
type: :development
|
79
79
|
prerelease: false
|
80
|
-
version_requirements: *
|
80
|
+
version_requirements: *17141620
|
81
81
|
description: This gem imports a WordPress XML dump into refinerycms (Page, User) and
|
82
82
|
refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)
|
83
83
|
email: marc.remolt@googlemail.com
|
@@ -85,17 +85,18 @@ executables: []
|
|
85
85
|
extensions: []
|
86
86
|
extra_rdoc_files: []
|
87
87
|
files:
|
88
|
-
- lib/wordpress.rb
|
89
|
-
- lib/wordpress
|
90
|
-
- lib/wordpress/author.rb
|
88
|
+
- lib/refinerycms-wordpress-import.rb
|
89
|
+
- lib/tasks/wordpress.rake
|
91
90
|
- lib/wordpress/comment.rb
|
91
|
+
- lib/wordpress/attachment.rb
|
92
|
+
- lib/wordpress/author.rb
|
93
|
+
- lib/wordpress/dump.rb
|
94
|
+
- lib/wordpress/page.rb
|
95
|
+
- lib/wordpress/category.rb
|
92
96
|
- lib/wordpress/post.rb
|
93
97
|
- lib/wordpress/railtie.rb
|
94
|
-
- lib/wordpress/category.rb
|
95
98
|
- lib/wordpress/tag.rb
|
96
|
-
- lib/wordpress
|
97
|
-
- lib/tasks/wordpress.rake
|
98
|
-
- lib/refinerycms-wordpress-import.rb
|
99
|
+
- lib/wordpress.rb
|
99
100
|
- MIT-LICENSE
|
100
101
|
- Rakefile
|
101
102
|
- Gemfile
|