refinerycms-wordpress-import 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -3,9 +3,17 @@ source "http://rubygems.org"
3
3
  gem "rails", "3.0.7"
4
4
  #gem "capybara", ">= 1.0.0.beta1"
5
5
  gem "sqlite3"
6
+ gem "rmagick"
6
7
 
7
- gem "rspec-rails", ">= 2.6.0"
8
- gem "database_cleaner"
8
+ group :development, :test do
9
+ gem "rspec-rails", ">= 2.6.0"
10
+ gem "database_cleaner"
11
+ gem 'guard-rspec'
12
+ gem 'ffi'
13
+ gem 'guard-bundler'
14
+ gem 'libnotify' if RUBY_PLATFORM =~ /linux/i
15
+ gem 'fakeweb'
16
+ end
9
17
 
10
18
  # To use debugger (ruby-debug for Ruby 1.8.7+, ruby-debug19 for Ruby 1.9.2+)
11
19
  # gem 'ruby-debug'
@@ -4,10 +4,7 @@ This litte project is an importer for WordPress XML dumps into refinerycms(-blog
4
4
 
5
5
  You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import
6
6
 
7
- Keep in mind, this gem imports blog posts and pages, NOT the media files, as they are not
8
- part of the XML dump! You have to manually readd them to Refinery.
9
-
10
- The same goes for links to other pages on your site. WordPress exports them just as <a>-Tags.
7
+ Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags.
11
8
  If your site (blog) structure uses new urls, the links WILL break! For example, if you used
12
9
  the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug".
13
10
  So your inner site links will point to the old WP url.
@@ -84,6 +81,39 @@ If you want to clean the tables and import in one task:
84
81
 
85
82
  rake wordpress:reset_and_import_pages[file_name]
86
83
 
84
+ Finally, if you want to reset and import all data including media (see below):
85
+
86
+ rake wordpress:full_import[file_name]
87
+
88
+
89
+ == Importing media files
90
+
91
+ The WP XML dump contains absolute links to media files linked inside posts, like:
92
+
93
+ www.mysite.com/wordpress/wp-content/uploads/2011/05/cv.txt
94
+
95
+ The dump does NOT contain the files itself! To get them imported, this gem downloads the files
96
+ from the given URL and imports them to refinery. So for a working media import the old site with
97
+ the media URLs must still be online.
98
+
99
+ After importing the files, this gem replaces the old links in pages and blog posts with the
100
+ new generated ones. It parses all existing records searching for the right pattern. That
101
+ means, you have to import pages and posts FIRST to get the URLs replaced.
102
+
103
+ Now to the rake tasks for media import:
104
+
105
+ rake wordpress:reset_media
106
+
107
+ This task deletes all data from the media tables (images and resources), ensuring a clean import.
108
+
109
+ rake wordpress:import_and_replace_media[file_name]
110
+
111
+ This task imports all the WordPress media into Refinery. After the import it parses all
112
+ pages and blog posts, replacing the legacy links with the current refinery ones.
113
+
114
+ If you want to clean the tables and import in one task:
115
+
116
+ rake wordpress:reset_import_and_replace_media[file_name]
87
117
 
88
118
  == Usage on ZSH
89
119
 
@@ -48,7 +48,7 @@ namespace :wordpress do
48
48
  end
49
49
  end
50
50
 
51
- desc "import cms data from a Refinery::WordPress XML dump"
51
+ desc "import cms data from a WordPress XML dump"
52
52
  task :import_pages, :file_name do |task, params|
53
53
  Rake::Task["environment"].invoke
54
54
  dump = Refinery::WordPress::Dump.new(params[:file_name])
@@ -72,10 +72,50 @@ namespace :wordpress do
72
72
  ENV.delete("MODEL")
73
73
  end
74
74
 
75
- desc "reset cms tables and then import cms data from a Refinery::WordPress XML dump"
75
+ desc "reset cms tables and then import cms data from a WordPress XML dump"
76
76
  task :reset_and_import_pages, :file_name do |task, params|
77
77
  Rake::Task["environment"].invoke
78
78
  Rake::Task["wordpress:reset_pages"].invoke
79
79
  Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
80
80
  end
81
+
82
+
83
+ desc "Reset the media relevant tables for a clean import"
84
+ task :reset_media do
85
+ Rake::Task["environment"].invoke
86
+
87
+ %w(images resources).each do |table_name|
88
+ p "Truncating #{table_name} ..."
89
+ ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
90
+ end
91
+ end
92
+
93
+ desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
94
+ task :import_and_replace_media, :file_name do |task, params|
95
+ Rake::Task["environment"].invoke
96
+ dump = Refinery::WordPress::Dump.new(params[:file_name])
97
+
98
+ attachments = dump.attachments.each(&:to_refinery)
99
+
100
+ # parse all created BlogPost and Page bodys and replace the old wordpress media uls
101
+ # with the newly created ones
102
+ attachments.each do |attachment|
103
+ attachment.replace_url
104
+ end
105
+ end
106
+
107
+ desc "reset media tables and then import media data from a WordPress XML dump"
108
+ task :reset_import_and_replace_media, :file_name do |task, params|
109
+ Rake::Task["environment"].invoke
110
+ Rake::Task["wordpress:reset_media"].invoke
111
+ Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
112
+ end
113
+
114
+ desc "reset and import all data (see the other tasks)"
115
+ task :full_import, :file_name do |task, params|
116
+ Rake::Task["environment"].invoke
117
+ Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name])
118
+ Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
119
+ Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
120
+ end
81
121
  end
@@ -1,15 +1,16 @@
1
+ require 'nokogiri'
2
+ require "wordpress/railtie"
3
+
1
4
  module Refinery
2
5
  module WordPress
6
+ autoload :Author, 'wordpress/author'
7
+ autoload :Tag, 'wordpress/tag'
8
+ autoload :Category, 'wordpress/category'
9
+ autoload :Page, 'wordpress/page'
10
+ autoload :Post, 'wordpress/post'
11
+ autoload :Comment, 'wordpress/comment'
12
+ autoload :Dump, 'wordpress/dump'
13
+ autoload :Attachment, 'wordpress/attachment'
3
14
  end
4
15
  end
5
16
 
6
- require 'nokogiri'
7
- require 'wordpress/author'
8
- require 'wordpress/tag'
9
- require 'wordpress/category'
10
- require 'wordpress/page'
11
- require 'wordpress/post'
12
- require 'wordpress/comment'
13
- require 'wordpress/dump'
14
-
15
- require "wordpress/railtie"
@@ -0,0 +1,130 @@
1
+ module Refinery
2
+ module WordPress
3
+ class Attachment
4
+ attr_reader :node
5
+ attr_reader :refinery_image
6
+ attr_reader :refinery_resource
7
+
8
+ def initialize(node)
9
+ @node = node
10
+ end
11
+
12
+ def title
13
+ node.xpath("title").text
14
+ end
15
+
16
+ def description
17
+ node.xpath("description").text
18
+ end
19
+
20
+ def file_name
21
+ url.split('/').last
22
+ end
23
+
24
+ def post_date
25
+ DateTime.parse node.xpath("wp:post_date").text
26
+ end
27
+
28
+ def url
29
+ node.xpath("wp:attachment_url").text
30
+ end
31
+
32
+ def url_pattern
33
+ url_parts = url.split('.')
34
+ extension = url_parts.pop
35
+ url_without_extension = url_parts.join('.')
36
+
37
+ /#{url_without_extension}(-\d+x\d+)?\.#{extension}/
38
+ end
39
+
40
+ def image?
41
+ url.match /\.(png|jpg|jpeg|gif)$/
42
+ end
43
+
44
+ def to_refinery
45
+ if image?
46
+ to_image
47
+ else
48
+ to_resource
49
+ end
50
+ end
51
+
52
+ def replace_url
53
+ if image?
54
+ replace_image_url
55
+ else
56
+ replace_resource_url
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ def to_image
63
+ image = ::Image.new
64
+ image.created_at = post_date
65
+ image.image_url = url
66
+ image.save!
67
+
68
+ @refinery_image = image
69
+ image
70
+ end
71
+
72
+ def to_resource
73
+ resource = ::Resource.new
74
+ resource.created_at = post_date
75
+ resource.file_url = url
76
+ resource.save!
77
+
78
+ @refinery_resource = resource
79
+ resource
80
+ end
81
+
82
+ def replace_image_url
83
+ replace_image_url_in_blog_posts
84
+ replace_image_url_in_pages
85
+ end
86
+
87
+ def replace_resource_url
88
+ replace_resource_url_in_blog_posts
89
+ replace_resource_url_in_pages
90
+ end
91
+
92
+ def replace_image_url_in_blog_posts
93
+ replace_url_in_blog_posts(refinery_image.image.url)
94
+ end
95
+
96
+ def replace_image_url_in_pages
97
+ replace_url_in_pages(refinery_image.image.url)
98
+ end
99
+
100
+ def replace_resource_url_in_blog_posts
101
+ replace_url_in_blog_posts(refinery_resource.file.url)
102
+ end
103
+
104
+ def replace_resource_url_in_pages
105
+ replace_url_in_pages(refinery_resource.file.url)
106
+ end
107
+
108
+ def replace_url_in_blog_posts(new_url)
109
+ ::BlogPost.all.each do |post|
110
+ if (! post.body.empty?) && post.body.include?(url)
111
+ post.body = post.body.gsub(url_pattern, new_url)
112
+ post.save!
113
+ end
114
+ end
115
+ end
116
+
117
+ def replace_url_in_pages(new_url)
118
+ ::Page.all.each do |page|
119
+ page.parts.each do |part|
120
+ if (! part.body.to_s.blank?) && part.body.include?(url)
121
+ part.body = part.body.gsub(url_pattern, new_url)
122
+ part.save!
123
+ end
124
+ end
125
+ end
126
+ end
127
+
128
+ end
129
+ end
130
+ end
@@ -47,6 +47,12 @@ module Refinery
47
47
  Category.new(category.text)
48
48
  end
49
49
  end
50
+
51
+ def attachments
52
+ doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
53
+ Attachment.new(attachment)
54
+ end
55
+ end
50
56
  end
51
57
  end
52
58
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: refinerycms-wordpress-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-06-05 00:00:00.000000000 +02:00
12
+ date: 2011-06-13 00:00:00.000000000 +02:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler
17
- requirement: &14624860 !ruby/object:Gem::Requirement
17
+ requirement: &17167700 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '1.0'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *14624860
25
+ version_requirements: *17167700
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: refinerycms
28
- requirement: &14624380 !ruby/object:Gem::Requirement
28
+ requirement: &17167080 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ~>
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 1.0.0
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *14624380
36
+ version_requirements: *17167080
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: refinerycms-blog
39
- requirement: &14623920 !ruby/object:Gem::Requirement
39
+ requirement: &17166340 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ~>
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 1.5.2
45
45
  type: :runtime
46
46
  prerelease: false
47
- version_requirements: *14623920
47
+ version_requirements: *17166340
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: nokogiri
50
- requirement: &14623460 !ruby/object:Gem::Requirement
50
+ requirement: &17165720 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ~>
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 1.4.4
56
56
  type: :runtime
57
57
  prerelease: false
58
- version_requirements: *14623460
58
+ version_requirements: *17165720
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: rspec-rails
61
- requirement: &14623080 !ruby/object:Gem::Requirement
61
+ requirement: &17142300 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ! '>='
@@ -66,10 +66,10 @@ dependencies:
66
66
  version: '0'
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *14623080
69
+ version_requirements: *17142300
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: database_cleaner
72
- requirement: &14622620 !ruby/object:Gem::Requirement
72
+ requirement: &17141620 !ruby/object:Gem::Requirement
73
73
  none: false
74
74
  requirements:
75
75
  - - ! '>='
@@ -77,7 +77,7 @@ dependencies:
77
77
  version: '0'
78
78
  type: :development
79
79
  prerelease: false
80
- version_requirements: *14622620
80
+ version_requirements: *17141620
81
81
  description: This gem imports a WordPress XML dump into refinerycms (Page, User) and
82
82
  refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)
83
83
  email: marc.remolt@googlemail.com
@@ -85,17 +85,18 @@ executables: []
85
85
  extensions: []
86
86
  extra_rdoc_files: []
87
87
  files:
88
- - lib/wordpress.rb
89
- - lib/wordpress/page.rb
90
- - lib/wordpress/author.rb
88
+ - lib/refinerycms-wordpress-import.rb
89
+ - lib/tasks/wordpress.rake
91
90
  - lib/wordpress/comment.rb
91
+ - lib/wordpress/attachment.rb
92
+ - lib/wordpress/author.rb
93
+ - lib/wordpress/dump.rb
94
+ - lib/wordpress/page.rb
95
+ - lib/wordpress/category.rb
92
96
  - lib/wordpress/post.rb
93
97
  - lib/wordpress/railtie.rb
94
- - lib/wordpress/category.rb
95
98
  - lib/wordpress/tag.rb
96
- - lib/wordpress/dump.rb
97
- - lib/tasks/wordpress.rake
98
- - lib/refinerycms-wordpress-import.rb
99
+ - lib/wordpress.rb
99
100
  - MIT-LICENSE
100
101
  - Rakefile
101
102
  - Gemfile