refinerycms-wordpress-import 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "rails", "3.0.7"
4
+ #gem "capybara", ">= 1.0.0.beta1"
5
+ gem "sqlite3"
6
+
7
+ gem "rspec-rails", ">= 2.6.0"
8
+ gem "database_cleaner"
9
+
10
+ # To use debugger (ruby-debug for Ruby 1.8.7+, ruby-debug19 for Ruby 1.9.2+)
11
+ # gem 'ruby-debug'
12
+ # gem 'ruby-debug19'
13
+
14
+ gem 'refinerycms'
15
+ gem 'refinerycms-blog'
16
+ gem 'refinerycms-wordpress-import', :path => './'
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2011 YOURNAME
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,61 @@
1
+ = Refinerycms-wordpress-import
2
+
3
+ This litte project is an importer for WordPress XML dumps into refinerycms(-blog).
4
+
5
+ So far, only blog-relevant data gets imported, I'm working on the cms pages part.
6
+
7
+ You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import
8
+
9
+ == Prerequisites
10
+
11
+ As refinerycms-wordpress-import is an addon for RefineryCMS, is shares the prerequisites with it.
12
+ So you'll first need a running installation of refinerycms and refinerycms-blog. Make sure
13
+ the site is running, all migrations are run and you created the first refinery user.
14
+
15
+ == Installation
16
+
17
+ As there is no official release out yet, just add this repos to your projects Gemfile:
18
+
19
+ gem 'refinerycms-wordpress-import', :git => 'git://github.com/mremolt/refinerycms-wordpress-import.git'
20
+
21
+ and run
22
+
23
+ bundle
24
+
25
+ == Usage
26
+
27
+ Importing the XML dump is done via 3 rake tasks:
28
+
29
+ rake wordpress:reset_blog
30
+
31
+ This one basically deletes all data from blog relevant tables (taggings, tags, blog_comments,
32
+ blog_categories, blog_posts, blog_categories_blog_posts).
33
+ Use this one first, if you want a clean import of your old blog.
34
+
35
+ rake wordpress:import_blog[file_name]
36
+
37
+ This one does all the heavy work of parsing the dump and importing the data into refinery tables.
38
+ The parameter is the path to the dump file. Got a report from a Mac user, that the ~
39
+ didn't work in the path. I'll have a look at it, but till then, don't use it please.
40
+
41
+ If you don't want to import draft posts, you can set the ENV variable ONLY_PUBLISHED to true:
42
+
43
+
44
+ rake wordpress:import_blog[file_name] ONLY_PUBLISHED=true
45
+
46
+ The task will then skip all posts that are not published.
47
+
48
+ rake wordpress:reset_and_import_blog[file_name]
49
+
50
+ This one combines the two previous tasks.
51
+
52
+ == Feedback
53
+
54
+ This is still a very new gem. It manages to import my own blog and a standard WordPress 3.1 dump with some sample posts.
55
+ The first feedback is quite good, so it seems, the gem doesn't eat the machines it is installed on.
56
+
57
+ If you want to help make it even more stable, please throw your own WP dumps against it
58
+ and see what happens. If you encounter any bugs, please file a bug report here on github.
59
+ A sample dump that breaks this gem would be really helpful in that case.
60
+
61
+ For extra karma, fork it, fix it yourself and send a pull request! ;-)
data/Rakefile ADDED
@@ -0,0 +1,25 @@
1
+ # encoding: UTF-8
2
+ require 'rubygems'
3
+ begin
4
+ require 'bundler/setup'
5
+ rescue LoadError
6
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
7
+ end
8
+
9
+ require 'rake'
10
+ require 'rdoc/task'
11
+
12
+ require 'rspec/core'
13
+ require 'rspec/core/rake_task'
14
+
15
+ RSpec::Core::RakeTask.new(:spec)
16
+
17
+ task :default => :spec
18
+
19
+ Rake::RDocTask.new(:rdoc) do |rdoc|
20
+ rdoc.rdoc_dir = 'rdoc'
21
+ rdoc.title = 'Refinerycms-wordpress-import'
22
+ rdoc.options << '--line-numbers' << '--inline-source'
23
+ rdoc.rdoc_files.include('README.rdoc')
24
+ rdoc.rdoc_files.include('lib/**/*.rb')
25
+ end
@@ -0,0 +1,7 @@
1
+ module Refinery
2
+ module WordPress
3
+
4
+ end
5
+ end
6
+
7
+ require 'wordpress'
@@ -0,0 +1,38 @@
1
+ require 'wordpress'
2
+
3
+ namespace :wordpress do
4
+ desc "Reset the blog relevant tables for a clean import"
5
+ task :reset_blog do
6
+ Rake::Task["environment"].invoke
7
+
8
+ %w(taggings tags blog_comments blog_categories blog_categories_blog_posts blog_posts).each do |table_name|
9
+ p "Truncating #{table_name} ..."
10
+ ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
11
+ end
12
+
13
+ end
14
+
15
+ desc "import blog data from a Refinery::WordPress XML dump"
16
+ task :import_blog, :file_name do |task, params|
17
+ Rake::Task["environment"].invoke
18
+ dump = Refinery::WordPress::Dump.new(params[:file_name])
19
+
20
+ dump.authors.each(&:to_refinery)
21
+
22
+ only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
23
+ dump.posts(only_published).each(&:to_refinery)
24
+
25
+ ENV["MODEL"] = 'BlogPost'
26
+ Rake::Task["friendly_id:redo_slugs"].invoke
27
+ ENV.delete("MODEL")
28
+ end
29
+
30
+
31
+ desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump"
32
+ task :reset_and_import_blog, :file_name do |task, params|
33
+ Rake::Task["environment"].invoke
34
+ Rake::Task["wordpress:reset_blog"].invoke
35
+ Rake::Task["wordpress:import_blog"].invoke(params[:file_name])
36
+ end
37
+
38
+ end
data/lib/wordpress.rb ADDED
@@ -0,0 +1,15 @@
1
+ module Refinery
2
+ module WordPress
3
+ end
4
+ end
5
+
6
+ require 'nokogiri'
7
+ require 'wordpress/author'
8
+ require 'wordpress/tag'
9
+ require 'wordpress/category'
10
+ require 'wordpress/page'
11
+ require 'wordpress/post'
12
+ require 'wordpress/comment'
13
+ require 'wordpress/dump'
14
+
15
+ require "wordpress/railtie"
@@ -0,0 +1,37 @@
1
+ module Refinery
2
+ module WordPress
3
+ class Author
4
+ attr_reader :author_node
5
+
6
+ def initialize(author_node)
7
+ @author_node = author_node
8
+ end
9
+
10
+ def login
11
+ author_node.xpath("wp:author_login").text
12
+ end
13
+
14
+ def email
15
+ author_node.xpath("wp:author_email").text
16
+ end
17
+
18
+ def ==(other)
19
+ login == other.login
20
+ end
21
+
22
+ def inspect
23
+ "WordPress::Author: #{login} <#{email}>"
24
+ end
25
+
26
+ def to_refinery
27
+ user = User.find_or_initialize_by_username_and_email(login, email)
28
+ unless user.persisted?
29
+ user.password = 'password'
30
+ user.password_confirmation = 'password'
31
+ user.save
32
+ end
33
+ user
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,19 @@
1
+ module Refinery
2
+ module WordPress
3
+ class Category
4
+ attr_accessor :name
5
+
6
+ def initialize(text)
7
+ @name = text
8
+ end
9
+
10
+ def ==(other)
11
+ name == other.name
12
+ end
13
+
14
+ def to_refinery
15
+ BlogCategory.find_or_create_by_title(name)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,48 @@
1
+ module Refinery
2
+ module WordPress
3
+ class Comment
4
+ attr_reader :node
5
+
6
+ def initialize(node)
7
+ @node = node
8
+ end
9
+
10
+ def author
11
+ node.xpath('wp:comment_author').text
12
+ end
13
+
14
+ def email
15
+ node.xpath('wp:comment_author_email').text
16
+ end
17
+
18
+ def url
19
+ node.xpath('wp:comment_author_url').text
20
+ end
21
+
22
+ def date
23
+ DateTime.parse node.xpath("wp:comment_date").text
24
+ end
25
+
26
+ def content
27
+ node.xpath('wp:comment_content').text
28
+ end
29
+
30
+ def approved?
31
+ node.xpath('wp:comment_approved').text.to_i == 1
32
+ end
33
+
34
+ def ==(other)
35
+ (email == other.email) && (date == other.date) && (content == other.content)
36
+ end
37
+
38
+ def to_refinery
39
+ comment = BlogComment.new :name => author, :email => email
40
+
41
+ comment.body = content
42
+ comment.created_at = date
43
+ comment.state = approved? ? 'approved' : 'rejected'
44
+ comment
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,49 @@
1
+ module Refinery
2
+ module WordPress
3
+ class Dump
4
+ attr_reader :doc
5
+
6
+ def initialize(file_name)
7
+ file_name = File.absolute_path(file_name)
8
+
9
+ raise "Given file '#{file_name}' no file or not readable." \
10
+ unless File.file?(file_name) && File.readable?(file_name)
11
+
12
+ file = File.open(file_name)
13
+ @doc = Nokogiri::XML(file)
14
+ end
15
+
16
+ def authors
17
+ doc.xpath("//wp:author").collect do |author|
18
+ Author.new(author)
19
+ end
20
+ end
21
+
22
+ def pages
23
+ doc.xpath("//item[wp:post_type = 'page']").collect do |page|
24
+ Page.new(page)
25
+ end
26
+ end
27
+
28
+ def posts(only_published=false)
29
+ posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post|
30
+ Post.new(post)
31
+ end
32
+ posts = posts.select(&:published?) if only_published
33
+ posts
34
+ end
35
+
36
+ def tags
37
+ doc.xpath("//wp:tag/wp:tag_slug").collect do |tag|
38
+ Tag.new(tag.text)
39
+ end
40
+ end
41
+
42
+ def categories
43
+ doc.xpath("//wp:category/wp:cat_name").collect do |category|
44
+ Category.new(category.text)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,91 @@
1
+ module Refinery
2
+ module WordPress
3
+ class Page
4
+ include ::ActionView::Helpers::TagHelper
5
+ include ::ActionView::Helpers::TextHelper
6
+
7
+ attr_reader :node
8
+
9
+ def initialize(node)
10
+ @node = node
11
+ end
12
+
13
+ def inspect
14
+ "WordPress::Page(#{post_id}): #{title}"
15
+ end
16
+
17
+ def title
18
+ node.xpath("title").text
19
+ end
20
+
21
+ def content
22
+ node.xpath("content:encoded").text
23
+ end
24
+
25
+ def content_formatted
26
+ # WordPress doesn't export <p>-Tags, so let's run a simple_format over
27
+ # the content. As we trust ourselves, no sanatize.
28
+ formatted = simple_format(content, {}, { :sanitize => false })
29
+
30
+ # Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
31
+ # In WordPress you can (via a plugin) enclose code in [lang][/lang]
32
+ # blocks, which are converted to a <pre>-tag with a class corresponding
33
+ # to the language.
34
+ #
35
+ # Example:
36
+ # [ruby]p "Hello World"[/ruby]
37
+ # -> <pre class="brush: ruby">p "Hello world"</pre>
38
+ formatted.gsub!(/\[(\w+)\]/, '<pre class="brush: \1">')
39
+ formatted.gsub!(/\[\/\w+\]/, '</pre>')
40
+
41
+ # remove all tags inside <pre> that simple_format created
42
+ # TODO: replace simple_format with a method, that ignores pre-tags
43
+ formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
44
+ "#{$1}#{strip_tags($2)}#{$3}"
45
+ end
46
+
47
+ formatted
48
+ end
49
+
50
+ def creator
51
+ node.xpath("dc:creator").text
52
+ end
53
+
54
+ def post_date
55
+ DateTime.parse node.xpath("wp:post_date").text
56
+ end
57
+
58
+ def post_id
59
+ node.xpath("wp:post_id").text.to_i
60
+ end
61
+
62
+ def parent_id
63
+ node.xpath("wp:post_parent").text.to_i
64
+ end
65
+
66
+ def status
67
+ node.xpath("wp:status").text
68
+ end
69
+
70
+ def draft?
71
+ status != 'publish'
72
+ end
73
+
74
+ def published?
75
+ ! draft?
76
+ end
77
+
78
+ def ==(other)
79
+ post_id == other.post_id
80
+ end
81
+
82
+ def to_refinery
83
+ page = ::Page.create!(:title => title, :created_at => post_date,
84
+ :draft => draft?, :parent_id => parent_id)
85
+
86
+ page.parts.create(:title => 'Body', :body => content_formatted)
87
+ page
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,66 @@
1
+ module Refinery
2
+ module WordPress
3
+ class Post < Page
4
+ def tags
5
+ # xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0
6
+ path = if node.xpath("category[@domain='post_tag']").count > 0
7
+ "category[@domain='post_tag']"
8
+ else
9
+ "category[@domain='tag']"
10
+ end
11
+
12
+ node.xpath(path).collect do |tag_node|
13
+ Tag.new(tag_node.text)
14
+ end
15
+ end
16
+
17
+ def tag_list
18
+ tags.collect(&:name).join(',')
19
+ end
20
+
21
+ def categories
22
+ node.xpath("category[@domain='category']").collect do |cat|
23
+ Category.new(cat.text)
24
+ end
25
+ end
26
+
27
+ def comments
28
+ node.xpath("wp:comment").collect do |comment_node|
29
+ Comment.new(comment_node)
30
+ end
31
+ end
32
+
33
+ def to_refinery
34
+ user = ::User.find_by_username(creator) || ::User.first
35
+ raise "Referenced User doesn't exist! Make sure the authors are imported first." \
36
+ unless user
37
+
38
+ begin
39
+ post = ::BlogPost.new :title => title, :body => content_formatted,
40
+ :draft => draft?, :published_at => post_date, :created_at => post_date,
41
+ :author => user, :tag_list => tag_list
42
+ post.save!
43
+
44
+ ::BlogPost.transaction do
45
+ categories.each do |category|
46
+ post.categories << category.to_refinery
47
+ end
48
+
49
+ comments.each do |comment|
50
+ comment = comment.to_refinery
51
+ comment.post = post
52
+ comment.save
53
+ end
54
+ end
55
+ rescue ActiveRecord::RecordInvalid
56
+ # if the title has already been taken (WP allows duplicates here,
57
+ # refinery doesn't) append the post_id to it, making it unique
58
+ post.title = "#{title}-#{post_id}"
59
+ post.save
60
+ end
61
+
62
+ post
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,10 @@
1
+ module Refinery
2
+ module WordPress
3
+ class Railtie < Rails::Railtie
4
+ rake_tasks do
5
+ load "tasks/wordpress.rake"
6
+ end
7
+ end
8
+ end
9
+ end
10
+
@@ -0,0 +1,20 @@
1
+ module Refinery
2
+ module WordPress
3
+ class Tag
4
+ attr_accessor :name
5
+
6
+ def initialize(text)
7
+ @name = text
8
+ end
9
+
10
+ def ==(other)
11
+ name == other.name
12
+ end
13
+
14
+ def to_refinery
15
+ ::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
16
+ end
17
+
18
+ end
19
+ end
20
+ end
metadata ADDED
@@ -0,0 +1,135 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: refinerycms-wordpress-import
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.0
6
+ platform: ruby
7
+ authors:
8
+ - Marc Remolt
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-06-03 00:00:00 +02:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: bundler
18
+ prerelease: false
19
+ requirement: &id001 !ruby/object:Gem::Requirement
20
+ none: false
21
+ requirements:
22
+ - - ~>
23
+ - !ruby/object:Gem::Version
24
+ version: "1.0"
25
+ type: :runtime
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: refinerycms
29
+ prerelease: false
30
+ requirement: &id002 !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ~>
34
+ - !ruby/object:Gem::Version
35
+ version: 1.0.0
36
+ type: :runtime
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: refinerycms-blog
40
+ prerelease: false
41
+ requirement: &id003 !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ~>
45
+ - !ruby/object:Gem::Version
46
+ version: 1.5.2
47
+ type: :runtime
48
+ version_requirements: *id003
49
+ - !ruby/object:Gem::Dependency
50
+ name: nokogiri
51
+ prerelease: false
52
+ requirement: &id004 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ~>
56
+ - !ruby/object:Gem::Version
57
+ version: 1.4.4
58
+ type: :runtime
59
+ version_requirements: *id004
60
+ - !ruby/object:Gem::Dependency
61
+ name: rspec-rails
62
+ prerelease: false
63
+ requirement: &id005 !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ type: :development
70
+ version_requirements: *id005
71
+ - !ruby/object:Gem::Dependency
72
+ name: database_cleaner
73
+ prerelease: false
74
+ requirement: &id006 !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: "0"
80
+ type: :development
81
+ version_requirements: *id006
82
+ description: This gem imports a WordPress XML dump into refinerycms (Page [soon], User) and refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)
83
+ email: marc.remolt@googlemail.com
84
+ executables: []
85
+
86
+ extensions: []
87
+
88
+ extra_rdoc_files: []
89
+
90
+ files:
91
+ - lib/wordpress.rb
92
+ - lib/wordpress/page.rb
93
+ - lib/wordpress/author.rb
94
+ - lib/wordpress/comment.rb
95
+ - lib/wordpress/post.rb
96
+ - lib/wordpress/railtie.rb
97
+ - lib/wordpress/category.rb
98
+ - lib/wordpress/tag.rb
99
+ - lib/wordpress/dump.rb
100
+ - lib/tasks/wordpress.rake
101
+ - lib/refinerycms-wordpress-import.rb
102
+ - MIT-LICENSE
103
+ - Rakefile
104
+ - Gemfile
105
+ - README.rdoc
106
+ has_rdoc: true
107
+ homepage: https://github.com/mremolt/refinerycms-wordpress-import
108
+ licenses: []
109
+
110
+ post_install_message:
111
+ rdoc_options: []
112
+
113
+ require_paths:
114
+ - lib
115
+ required_ruby_version: !ruby/object:Gem::Requirement
116
+ none: false
117
+ requirements:
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: "0"
121
+ required_rubygems_version: !ruby/object:Gem::Requirement
122
+ none: false
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: "0"
127
+ requirements: []
128
+
129
+ rubyforge_project:
130
+ rubygems_version: 1.6.2
131
+ signing_key:
132
+ specification_version: 3
133
+ summary: Import WordPress XML dumps into refinerycms(-blog).
134
+ test_files: []
135
+