refinerycms-wordpress-import 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -3,9 +3,17 @@ source "http://rubygems.org"
3
3
  gem "rails", "3.0.7"
4
4
  #gem "capybara", ">= 1.0.0.beta1"
5
5
  gem "sqlite3"
6
+ gem "rmagick"
6
7
 
7
- gem "rspec-rails", ">= 2.6.0"
8
- gem "database_cleaner"
8
+ group :development, :test do
9
+ gem "rspec-rails", ">= 2.6.0"
10
+ gem "database_cleaner"
11
+ gem 'guard-rspec'
12
+ gem 'ffi'
13
+ gem 'guard-bundler'
14
+ gem 'libnotify' if RUBY_PLATFORM =~ /linux/i
15
+ gem 'fakeweb'
16
+ end
9
17
 
10
18
  # To use debugger (ruby-debug for Ruby 1.8.7+, ruby-debug19 for Ruby 1.9.2+)
11
19
  # gem 'ruby-debug'
@@ -4,10 +4,7 @@ This litte project is an importer for WordPress XML dumps into refinerycms(-blog
4
4
 
5
5
  You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import
6
6
 
7
- Keep in mind, this gem imports blog posts and pages, NOT the media files, as they are not
8
- part of the XML dump! You have to manually readd them to Refinery.
9
-
10
- The same goes for links to other pages on your site. WordPress exports them just as <a>-Tags.
7
+ Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags.
11
8
  If your site (blog) structure uses new urls, the links WILL break! For example, if you used
12
9
  the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug".
13
10
  So your inner site links will point to the old WP url.
@@ -84,6 +81,39 @@ If you want to clean the tables and import in one task:
84
81
 
85
82
  rake wordpress:reset_and_import_pages[file_name]
86
83
 
84
+ Finally, if you want to reset and import all data including media (see below):
85
+
86
+ rake wordpress:full_import[file_name]
87
+
88
+
89
+ == Importing media files
90
+
91
+ The WP XML dump contains absolute links to media files linked inside posts, like:
92
+
93
+ www.mysite.com/wordpress/wp-content/uploads/2011/05/cv.txt
94
+
95
+ The dump does NOT contain the files itself! To get them imported, this gem downloads the files
96
+ from the given URL and imports them to refinery. So for a working media import the old site with
97
+ the media URLs must still be online.
98
+
99
+ After importing the files, this gem replaces the old links in pages and blog posts with the
100
+ new generated ones. It parses all existing records searching for the right pattern. That
101
+ means, you have to import pages and posts FIRST to get the URLs replaced.
102
+
103
+ Now to the rake tasks for media import:
104
+
105
+ rake wordpress:reset_media
106
+
107
+ This task deletes all data from the media tables (images and resources), ensuring a clean import.
108
+
109
+ rake wordpress:import_and_replace_media[file_name]
110
+
111
+ This task imports all the WordPress media into Refinery. After the import it parses all
112
+ pages and blog posts, replacing the legacy links with the current refinery ones.
113
+
114
+ If you want to clean the tables and import in one task:
115
+
116
+ rake wordpress:reset_import_and_replace_media[file_name]
87
117
 
88
118
  == Usage on ZSH
89
119
 
@@ -48,7 +48,7 @@ namespace :wordpress do
48
48
  end
49
49
  end
50
50
 
51
- desc "import cms data from a Refinery::WordPress XML dump"
51
+ desc "import cms data from a WordPress XML dump"
52
52
  task :import_pages, :file_name do |task, params|
53
53
  Rake::Task["environment"].invoke
54
54
  dump = Refinery::WordPress::Dump.new(params[:file_name])
@@ -72,10 +72,50 @@ namespace :wordpress do
72
72
  ENV.delete("MODEL")
73
73
  end
74
74
 
75
- desc "reset cms tables and then import cms data from a Refinery::WordPress XML dump"
75
+ desc "reset cms tables and then import cms data from a WordPress XML dump"
76
76
  task :reset_and_import_pages, :file_name do |task, params|
77
77
  Rake::Task["environment"].invoke
78
78
  Rake::Task["wordpress:reset_pages"].invoke
79
79
  Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
80
80
  end
81
+
82
+
83
+ desc "Reset the media relevant tables for a clean import"
84
+ task :reset_media do
85
+ Rake::Task["environment"].invoke
86
+
87
+ %w(images resources).each do |table_name|
88
+ p "Truncating #{table_name} ..."
89
+ ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
90
+ end
91
+ end
92
+
93
+ desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
94
+ task :import_and_replace_media, :file_name do |task, params|
95
+ Rake::Task["environment"].invoke
96
+ dump = Refinery::WordPress::Dump.new(params[:file_name])
97
+
98
+ attachments = dump.attachments.each(&:to_refinery)
99
+
100
+ # parse all created BlogPost and Page bodys and replace the old wordpress media uls
101
+ # with the newly created ones
102
+ attachments.each do |attachment|
103
+ attachment.replace_url
104
+ end
105
+ end
106
+
107
+ desc "reset media tables and then import media data from a WordPress XML dump"
108
+ task :reset_import_and_replace_media, :file_name do |task, params|
109
+ Rake::Task["environment"].invoke
110
+ Rake::Task["wordpress:reset_media"].invoke
111
+ Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
112
+ end
113
+
114
+ desc "reset and import all data (see the other tasks)"
115
+ task :full_import, :file_name do |task, params|
116
+ Rake::Task["environment"].invoke
117
+ Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name])
118
+ Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
119
+ Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
120
+ end
81
121
  end
@@ -1,15 +1,16 @@
1
+ require 'nokogiri'
2
+ require "wordpress/railtie"
3
+
1
4
  module Refinery
2
5
  module WordPress
6
+ autoload :Author, 'wordpress/author'
7
+ autoload :Tag, 'wordpress/tag'
8
+ autoload :Category, 'wordpress/category'
9
+ autoload :Page, 'wordpress/page'
10
+ autoload :Post, 'wordpress/post'
11
+ autoload :Comment, 'wordpress/comment'
12
+ autoload :Dump, 'wordpress/dump'
13
+ autoload :Attachment, 'wordpress/attachment'
3
14
  end
4
15
  end
5
16
 
6
- require 'nokogiri'
7
- require 'wordpress/author'
8
- require 'wordpress/tag'
9
- require 'wordpress/category'
10
- require 'wordpress/page'
11
- require 'wordpress/post'
12
- require 'wordpress/comment'
13
- require 'wordpress/dump'
14
-
15
- require "wordpress/railtie"
@@ -0,0 +1,130 @@
1
+ module Refinery
2
+ module WordPress
3
+ class Attachment
4
+ attr_reader :node
5
+ attr_reader :refinery_image
6
+ attr_reader :refinery_resource
7
+
8
+ def initialize(node)
9
+ @node = node
10
+ end
11
+
12
+ def title
13
+ node.xpath("title").text
14
+ end
15
+
16
+ def description
17
+ node.xpath("description").text
18
+ end
19
+
20
+ def file_name
21
+ url.split('/').last
22
+ end
23
+
24
+ def post_date
25
+ DateTime.parse node.xpath("wp:post_date").text
26
+ end
27
+
28
+ def url
29
+ node.xpath("wp:attachment_url").text
30
+ end
31
+
32
+ def url_pattern
33
+ url_parts = url.split('.')
34
+ extension = url_parts.pop
35
+ url_without_extension = url_parts.join('.')
36
+
37
+ /#{url_without_extension}(-\d+x\d+)?\.#{extension}/
38
+ end
39
+
40
+ def image?
41
+ url.match /\.(png|jpg|jpeg|gif)$/
42
+ end
43
+
44
+ def to_refinery
45
+ if image?
46
+ to_image
47
+ else
48
+ to_resource
49
+ end
50
+ end
51
+
52
+ def replace_url
53
+ if image?
54
+ replace_image_url
55
+ else
56
+ replace_resource_url
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ def to_image
63
+ image = ::Image.new
64
+ image.created_at = post_date
65
+ image.image_url = url
66
+ image.save!
67
+
68
+ @refinery_image = image
69
+ image
70
+ end
71
+
72
+ def to_resource
73
+ resource = ::Resource.new
74
+ resource.created_at = post_date
75
+ resource.file_url = url
76
+ resource.save!
77
+
78
+ @refinery_resource = resource
79
+ resource
80
+ end
81
+
82
+ def replace_image_url
83
+ replace_image_url_in_blog_posts
84
+ replace_image_url_in_pages
85
+ end
86
+
87
+ def replace_resource_url
88
+ replace_resource_url_in_blog_posts
89
+ replace_resource_url_in_pages
90
+ end
91
+
92
+ def replace_image_url_in_blog_posts
93
+ replace_url_in_blog_posts(refinery_image.image.url)
94
+ end
95
+
96
+ def replace_image_url_in_pages
97
+ replace_url_in_pages(refinery_image.image.url)
98
+ end
99
+
100
+ def replace_resource_url_in_blog_posts
101
+ replace_url_in_blog_posts(refinery_resource.file.url)
102
+ end
103
+
104
+ def replace_resource_url_in_pages
105
+ replace_url_in_pages(refinery_resource.file.url)
106
+ end
107
+
108
+ def replace_url_in_blog_posts(new_url)
109
+ ::BlogPost.all.each do |post|
110
+ if (! post.body.empty?) && post.body.include?(url)
111
+ post.body = post.body.gsub(url_pattern, new_url)
112
+ post.save!
113
+ end
114
+ end
115
+ end
116
+
117
+ def replace_url_in_pages(new_url)
118
+ ::Page.all.each do |page|
119
+ page.parts.each do |part|
120
+ if (! part.body.to_s.blank?) && part.body.include?(url)
121
+ part.body = part.body.gsub(url_pattern, new_url)
122
+ part.save!
123
+ end
124
+ end
125
+ end
126
+ end
127
+
128
+ end
129
+ end
130
+ end
@@ -47,6 +47,12 @@ module Refinery
47
47
  Category.new(category.text)
48
48
  end
49
49
  end
50
+
51
+ def attachments
52
+ doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
53
+ Attachment.new(attachment)
54
+ end
55
+ end
50
56
  end
51
57
  end
52
58
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: refinerycms-wordpress-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-06-05 00:00:00.000000000 +02:00
12
+ date: 2011-06-13 00:00:00.000000000 +02:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler
17
- requirement: &14624860 !ruby/object:Gem::Requirement
17
+ requirement: &17167700 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '1.0'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *14624860
25
+ version_requirements: *17167700
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: refinerycms
28
- requirement: &14624380 !ruby/object:Gem::Requirement
28
+ requirement: &17167080 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 1.0.0
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *14624380
36
+ version_requirements: *17167080
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: refinerycms-blog
39
- requirement: &14623920 !ruby/object:Gem::Requirement
39
+ requirement: &17166340 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ~>
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 1.5.2
45
45
  type: :runtime
46
46
  prerelease: false
47
- version_requirements: *14623920
47
+ version_requirements: *17166340
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: nokogiri
50
- requirement: &14623460 !ruby/object:Gem::Requirement
50
+ requirement: &17165720 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ~>
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 1.4.4
56
56
  type: :runtime
57
57
  prerelease: false
58
- version_requirements: *14623460
58
+ version_requirements: *17165720
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: rspec-rails
61
- requirement: &14623080 !ruby/object:Gem::Requirement
61
+ requirement: &17142300 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ! '>='
@@ -66,10 +66,10 @@ dependencies:
66
66
  version: '0'
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *14623080
69
+ version_requirements: *17142300
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: database_cleaner
72
- requirement: &14622620 !ruby/object:Gem::Requirement
72
+ requirement: &17141620 !ruby/object:Gem::Requirement
73
73
  none: false
74
74
  requirements:
75
75
  - - ! '>='
@@ -77,7 +77,7 @@ dependencies:
77
77
  version: '0'
78
78
  type: :development
79
79
  prerelease: false
80
- version_requirements: *14622620
80
+ version_requirements: *17141620
81
81
  description: This gem imports a WordPress XML dump into refinerycms (Page, User) and
82
82
  refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)
83
83
  email: marc.remolt@googlemail.com
@@ -85,17 +85,18 @@ executables: []
85
85
  extensions: []
86
86
  extra_rdoc_files: []
87
87
  files:
88
- - lib/wordpress.rb
89
- - lib/wordpress/page.rb
90
- - lib/wordpress/author.rb
88
+ - lib/refinerycms-wordpress-import.rb
89
+ - lib/tasks/wordpress.rake
91
90
  - lib/wordpress/comment.rb
91
+ - lib/wordpress/attachment.rb
92
+ - lib/wordpress/author.rb
93
+ - lib/wordpress/dump.rb
94
+ - lib/wordpress/page.rb
95
+ - lib/wordpress/category.rb
92
96
  - lib/wordpress/post.rb
93
97
  - lib/wordpress/railtie.rb
94
- - lib/wordpress/category.rb
95
98
  - lib/wordpress/tag.rb
96
- - lib/wordpress/dump.rb
97
- - lib/tasks/wordpress.rake
98
- - lib/refinerycms-wordpress-import.rb
99
+ - lib/wordpress.rb
99
100
  - MIT-LICENSE
100
101
  - Rakefile
101
102
  - Gemfile