refinerycms-wordpress-import 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +10 -2
- data/README.rdoc +34 -4
- data/lib/tasks/wordpress.rake +42 -2
- data/lib/wordpress.rb +11 -10
- data/lib/wordpress/attachment.rb +130 -0
- data/lib/wordpress/dump.rb +6 -0
- metadata +22 -21
data/Gemfile
CHANGED
@@ -3,9 +3,17 @@ source "http://rubygems.org"
|
|
3
3
|
gem "rails", "3.0.7"
|
4
4
|
#gem "capybara", ">= 1.0.0.beta1"
|
5
5
|
gem "sqlite3"
|
6
|
+
gem "rmagick"
|
6
7
|
|
7
|
-
|
8
|
-
gem "
|
8
|
+
group :development, :test do
|
9
|
+
gem "rspec-rails", ">= 2.6.0"
|
10
|
+
gem "database_cleaner"
|
11
|
+
gem 'guard-rspec'
|
12
|
+
gem 'ffi'
|
13
|
+
gem 'guard-bundler'
|
14
|
+
gem 'libnotify' if RUBY_PLATFORM =~ /linux/i
|
15
|
+
gem 'fakeweb'
|
16
|
+
end
|
9
17
|
|
10
18
|
# To use debugger (ruby-debug for Ruby 1.8.7+, ruby-debug19 for Ruby 1.9.2+)
|
11
19
|
# gem 'ruby-debug'
|
data/README.rdoc
CHANGED
@@ -4,10 +4,7 @@ This litte project is an importer for WordPress XML dumps into refinerycms(-blog
|
|
4
4
|
|
5
5
|
You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import
|
6
6
|
|
7
|
-
Keep in mind
|
8
|
-
part of the XML dump! You have to manually readd them to Refinery.
|
9
|
-
|
10
|
-
The same goes for links to other pages on your site. WordPress exports them just as <a>-Tags.
|
7
|
+
Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags.
|
11
8
|
If your site (blog) structure uses new urls, the links WILL break! For example, if you used
|
12
9
|
the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug".
|
13
10
|
So your inner site links will point to the old WP url.
|
@@ -84,6 +81,39 @@ If you want to clean the tables and import in one task:
|
|
84
81
|
|
85
82
|
rake wordpress:reset_and_import_pages[file_name]
|
86
83
|
|
84
|
+
Finally, if you want to reset and import all data including media (see below):
|
85
|
+
|
86
|
+
rake wordpress:full_import[file_name]
|
87
|
+
|
88
|
+
|
89
|
+
== Importing media files
|
90
|
+
|
91
|
+
The WP XML dump contains absolute links to media files linked inside posts, like:
|
92
|
+
|
93
|
+
www.mysite.com/wordpress/wp-content/uploads/2011/05/cv.txt
|
94
|
+
|
95
|
+
The dump does NOT contain the files itself! To get them imported, this gem downloads the files
|
96
|
+
from the given URL and imports them to refinery. So for a working media import the old site with
|
97
|
+
the media URLs must still be online.
|
98
|
+
|
99
|
+
After importing the files, this gem replaces the old links in pages and blog posts with the
|
100
|
+
new generated ones. It parses all existing records searching for the right pattern. That
|
101
|
+
means, you have to import pages and posts FIRST to get the URLs replaced.
|
102
|
+
|
103
|
+
Now to the rake tasks for media import:
|
104
|
+
|
105
|
+
rake wordpress:reset_media
|
106
|
+
|
107
|
+
This task deletes all data from the media tables (images and resources), ensuring a clean import.
|
108
|
+
|
109
|
+
rake wordpress:import_and_replace_media[file_name]
|
110
|
+
|
111
|
+
This task imports all the WordPress media into Refinery. After the import it parses all
|
112
|
+
pages and blog posts, replacing the legacy links with the current refinery ones.
|
113
|
+
|
114
|
+
If you want to clean the tables and import in one task:
|
115
|
+
|
116
|
+
rake wordpress:reset_import_and_replace_media[file_name]
|
87
117
|
|
88
118
|
== Usage on ZSH
|
89
119
|
|
data/lib/tasks/wordpress.rake
CHANGED
@@ -48,7 +48,7 @@ namespace :wordpress do
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
desc "import cms data from a
|
51
|
+
desc "import cms data from a WordPress XML dump"
|
52
52
|
task :import_pages, :file_name do |task, params|
|
53
53
|
Rake::Task["environment"].invoke
|
54
54
|
dump = Refinery::WordPress::Dump.new(params[:file_name])
|
@@ -72,10 +72,50 @@ namespace :wordpress do
|
|
72
72
|
ENV.delete("MODEL")
|
73
73
|
end
|
74
74
|
|
75
|
-
desc "reset cms tables and then import cms data from a
|
75
|
+
desc "reset cms tables and then import cms data from a WordPress XML dump"
|
76
76
|
task :reset_and_import_pages, :file_name do |task, params|
|
77
77
|
Rake::Task["environment"].invoke
|
78
78
|
Rake::Task["wordpress:reset_pages"].invoke
|
79
79
|
Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
|
80
80
|
end
|
81
|
+
|
82
|
+
|
83
|
+
desc "Reset the media relevant tables for a clean import"
|
84
|
+
task :reset_media do
|
85
|
+
Rake::Task["environment"].invoke
|
86
|
+
|
87
|
+
%w(images resources).each do |table_name|
|
88
|
+
p "Truncating #{table_name} ..."
|
89
|
+
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
|
94
|
+
task :import_and_replace_media, :file_name do |task, params|
|
95
|
+
Rake::Task["environment"].invoke
|
96
|
+
dump = Refinery::WordPress::Dump.new(params[:file_name])
|
97
|
+
|
98
|
+
attachments = dump.attachments.each(&:to_refinery)
|
99
|
+
|
100
|
+
# parse all created BlogPost and Page bodys and replace the old wordpress media uls
|
101
|
+
# with the newly created ones
|
102
|
+
attachments.each do |attachment|
|
103
|
+
attachment.replace_url
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
desc "reset media tables and then import media data from a WordPress XML dump"
|
108
|
+
task :reset_import_and_replace_media, :file_name do |task, params|
|
109
|
+
Rake::Task["environment"].invoke
|
110
|
+
Rake::Task["wordpress:reset_media"].invoke
|
111
|
+
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
|
112
|
+
end
|
113
|
+
|
114
|
+
desc "reset and import all data (see the other tasks)"
|
115
|
+
task :full_import, :file_name do |task, params|
|
116
|
+
Rake::Task["environment"].invoke
|
117
|
+
Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name])
|
118
|
+
Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
|
119
|
+
Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
|
120
|
+
end
|
81
121
|
end
|
data/lib/wordpress.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require "wordpress/railtie"
|
3
|
+
|
1
4
|
module Refinery
|
2
5
|
module WordPress
|
6
|
+
autoload :Author, 'wordpress/author'
|
7
|
+
autoload :Tag, 'wordpress/tag'
|
8
|
+
autoload :Category, 'wordpress/category'
|
9
|
+
autoload :Page, 'wordpress/page'
|
10
|
+
autoload :Post, 'wordpress/post'
|
11
|
+
autoload :Comment, 'wordpress/comment'
|
12
|
+
autoload :Dump, 'wordpress/dump'
|
13
|
+
autoload :Attachment, 'wordpress/attachment'
|
3
14
|
end
|
4
15
|
end
|
5
16
|
|
6
|
-
require 'nokogiri'
|
7
|
-
require 'wordpress/author'
|
8
|
-
require 'wordpress/tag'
|
9
|
-
require 'wordpress/category'
|
10
|
-
require 'wordpress/page'
|
11
|
-
require 'wordpress/post'
|
12
|
-
require 'wordpress/comment'
|
13
|
-
require 'wordpress/dump'
|
14
|
-
|
15
|
-
require "wordpress/railtie"
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module Refinery
|
2
|
+
module WordPress
|
3
|
+
class Attachment
|
4
|
+
attr_reader :node
|
5
|
+
attr_reader :refinery_image
|
6
|
+
attr_reader :refinery_resource
|
7
|
+
|
8
|
+
def initialize(node)
|
9
|
+
@node = node
|
10
|
+
end
|
11
|
+
|
12
|
+
def title
|
13
|
+
node.xpath("title").text
|
14
|
+
end
|
15
|
+
|
16
|
+
def description
|
17
|
+
node.xpath("description").text
|
18
|
+
end
|
19
|
+
|
20
|
+
def file_name
|
21
|
+
url.split('/').last
|
22
|
+
end
|
23
|
+
|
24
|
+
def post_date
|
25
|
+
DateTime.parse node.xpath("wp:post_date").text
|
26
|
+
end
|
27
|
+
|
28
|
+
def url
|
29
|
+
node.xpath("wp:attachment_url").text
|
30
|
+
end
|
31
|
+
|
32
|
+
def url_pattern
|
33
|
+
url_parts = url.split('.')
|
34
|
+
extension = url_parts.pop
|
35
|
+
url_without_extension = url_parts.join('.')
|
36
|
+
|
37
|
+
/#{url_without_extension}(-\d+x\d+)?\.#{extension}/
|
38
|
+
end
|
39
|
+
|
40
|
+
def image?
|
41
|
+
url.match /\.(png|jpg|jpeg|gif)$/
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_refinery
|
45
|
+
if image?
|
46
|
+
to_image
|
47
|
+
else
|
48
|
+
to_resource
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def replace_url
|
53
|
+
if image?
|
54
|
+
replace_image_url
|
55
|
+
else
|
56
|
+
replace_resource_url
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def to_image
|
63
|
+
image = ::Image.new
|
64
|
+
image.created_at = post_date
|
65
|
+
image.image_url = url
|
66
|
+
image.save!
|
67
|
+
|
68
|
+
@refinery_image = image
|
69
|
+
image
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_resource
|
73
|
+
resource = ::Resource.new
|
74
|
+
resource.created_at = post_date
|
75
|
+
resource.file_url = url
|
76
|
+
resource.save!
|
77
|
+
|
78
|
+
@refinery_resource = resource
|
79
|
+
resource
|
80
|
+
end
|
81
|
+
|
82
|
+
def replace_image_url
|
83
|
+
replace_image_url_in_blog_posts
|
84
|
+
replace_image_url_in_pages
|
85
|
+
end
|
86
|
+
|
87
|
+
def replace_resource_url
|
88
|
+
replace_resource_url_in_blog_posts
|
89
|
+
replace_resource_url_in_pages
|
90
|
+
end
|
91
|
+
|
92
|
+
def replace_image_url_in_blog_posts
|
93
|
+
replace_url_in_blog_posts(refinery_image.image.url)
|
94
|
+
end
|
95
|
+
|
96
|
+
def replace_image_url_in_pages
|
97
|
+
replace_url_in_pages(refinery_image.image.url)
|
98
|
+
end
|
99
|
+
|
100
|
+
def replace_resource_url_in_blog_posts
|
101
|
+
replace_url_in_blog_posts(refinery_resource.file.url)
|
102
|
+
end
|
103
|
+
|
104
|
+
def replace_resource_url_in_pages
|
105
|
+
replace_url_in_pages(refinery_resource.file.url)
|
106
|
+
end
|
107
|
+
|
108
|
+
def replace_url_in_blog_posts(new_url)
|
109
|
+
::BlogPost.all.each do |post|
|
110
|
+
if (! post.body.empty?) && post.body.include?(url)
|
111
|
+
post.body = post.body.gsub(url_pattern, new_url)
|
112
|
+
post.save!
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def replace_url_in_pages(new_url)
|
118
|
+
::Page.all.each do |page|
|
119
|
+
page.parts.each do |part|
|
120
|
+
if (! part.body.to_s.blank?) && part.body.include?(url)
|
121
|
+
part.body = part.body.gsub(url_pattern, new_url)
|
122
|
+
part.save!
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
data/lib/wordpress/dump.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: refinerycms-wordpress-import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-06-
|
12
|
+
date: 2011-06-13 00:00:00.000000000 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|
17
|
-
requirement: &
|
17
|
+
requirement: &17167700 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '1.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *17167700
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: refinerycms
|
28
|
-
requirement: &
|
28
|
+
requirement: &17167080 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.0.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *17167080
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: refinerycms-blog
|
39
|
-
requirement: &
|
39
|
+
requirement: &17166340 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ~>
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 1.5.2
|
45
45
|
type: :runtime
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *17166340
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: nokogiri
|
50
|
-
requirement: &
|
50
|
+
requirement: &17165720 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ~>
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 1.4.4
|
56
56
|
type: :runtime
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *17165720
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: rspec-rails
|
61
|
-
requirement: &
|
61
|
+
requirement: &17142300 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ! '>='
|
@@ -66,10 +66,10 @@ dependencies:
|
|
66
66
|
version: '0'
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *17142300
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: database_cleaner
|
72
|
-
requirement: &
|
72
|
+
requirement: &17141620 !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
75
|
- - ! '>='
|
@@ -77,7 +77,7 @@ dependencies:
|
|
77
77
|
version: '0'
|
78
78
|
type: :development
|
79
79
|
prerelease: false
|
80
|
-
version_requirements: *
|
80
|
+
version_requirements: *17141620
|
81
81
|
description: This gem imports a WordPress XML dump into refinerycms (Page, User) and
|
82
82
|
refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)
|
83
83
|
email: marc.remolt@googlemail.com
|
@@ -85,17 +85,18 @@ executables: []
|
|
85
85
|
extensions: []
|
86
86
|
extra_rdoc_files: []
|
87
87
|
files:
|
88
|
-
- lib/wordpress.rb
|
89
|
-
- lib/wordpress
|
90
|
-
- lib/wordpress/author.rb
|
88
|
+
- lib/refinerycms-wordpress-import.rb
|
89
|
+
- lib/tasks/wordpress.rake
|
91
90
|
- lib/wordpress/comment.rb
|
91
|
+
- lib/wordpress/attachment.rb
|
92
|
+
- lib/wordpress/author.rb
|
93
|
+
- lib/wordpress/dump.rb
|
94
|
+
- lib/wordpress/page.rb
|
95
|
+
- lib/wordpress/category.rb
|
92
96
|
- lib/wordpress/post.rb
|
93
97
|
- lib/wordpress/railtie.rb
|
94
|
-
- lib/wordpress/category.rb
|
95
98
|
- lib/wordpress/tag.rb
|
96
|
-
- lib/wordpress
|
97
|
-
- lib/tasks/wordpress.rake
|
98
|
-
- lib/refinerycms-wordpress-import.rb
|
99
|
+
- lib/wordpress.rb
|
99
100
|
- MIT-LICENSE
|
100
101
|
- Rakefile
|
101
102
|
- Gemfile
|