wpxml_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Maxim Chernyak
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,51 @@
1
+ wpxml_parser
2
+ ============
3
+
4
+ WordPress (version 3.x and I think there's a plugin for 2.x) can export xml with all your blog posts, comments, etc. This tool can parse the xml file and give you a convenient access to all your posts, comments, categories, etc in a ruby script that _you have to write_.
5
+
6
+ The cool thing is — you don't even need to have access to your own WordPress database, all you need is to download the xml file from your admin panel.
7
+
8
+ ### Let me demonstrate
9
+
10
+ require 'wpxml_parser'
11
+ include WpxmlParser
12
+
13
+ # Blog
14
+ blog = Blog.new('dump_of_your_blog.xml')
15
+ blog.posts.size # => 51
16
+ post = blog.posts.first # => <Post>
17
+
18
+ # Post
19
+ post.title # => "Title of your post"
20
+ post.categories # => ["ruby", "rails"]
21
+ post.date # => <Time>
22
+ post.slug # => "title-of-your-post"
23
+ post.body # => "...shitload of text..."
24
+ post.post_id # => 27
25
+
26
+ # Comments
27
+ post.comments # => [<Comment>, <Comment>, ...]
28
+ comment = post.comments.first
29
+
30
+ comment.author # => 'dude'
31
+ comment.date # => <Time>
32
+
33
+ # Comments have parent and children
34
+ comment.parent_id # => 32
35
+ comment.parent # => <Comment>
36
+ comment.children # => [<Comment>, <Comment>]
37
+
38
+ # Conveniences
39
+ # You can quickly find a post or a comment by post_id or comment_id respectively
40
+ blog.find_post(27) # => <Post>
41
+ post.find_comment(32) # => <Comment>
42
+
43
+ # For more info and more accessible properties check out the source code, it's pretty straightforward.
44
+
45
+ ### A note on posts
46
+
47
+ Currently only published posts are included. Drafts, private, etc are skipped. This was done on purpose, since I didn't really use those wordpress features.
48
+
49
+ ### A note on comments
50
+
51
+ Currently only approved comments are included. This was done on purpose, since I didn't need spam.
@@ -0,0 +1,4 @@
1
+ require 'wpxml_parser/blog'
2
+ require 'wpxml_parser/entity'
3
+ require 'wpxml_parser/comment'
4
+ require 'wpxml_parser/post'
@@ -0,0 +1,28 @@
1
+ require 'nokogiri'
2
+ require File.dirname(__FILE__) + '/post'
3
+
4
+ module WpxmlParser
5
+ class Blog
6
+ def initialize(xml_filename)
7
+ File.open(xml_filename) do |xml_file|
8
+ @xml = Nokogiri::XML(xml_file)
9
+ end
10
+ end
11
+
12
+ def posts
13
+ @xml.xpath('//item').map do |item|
14
+ Post.new(item)
15
+ end.select{|p| p.post_type == 'post' && p.status == 'publish'}
16
+ end
17
+
18
+ def find_post(post_id)
19
+ posts.find{|p| p.post_id == post_id}
20
+ end
21
+
22
+ def attachments
23
+ @xml.xpath('//item').map do |item|
24
+ Post.new(item)
25
+ end.select{|p| p.post_type == 'attachment'}
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,56 @@
1
+ require 'time'
2
+ require File.dirname(__FILE__) + '/entity'
3
+
4
+ module WpxmlParser
5
+ class Comment < Entity
6
+ def initialize(entity, post)
7
+ super(entity)
8
+ @post = post
9
+ end
10
+
11
+ def self.method_element_map
12
+ { :comment_id => 'wp:comment_id',
13
+ :author => 'wp:comment_author',
14
+ :email => 'wp:comment_author_email',
15
+ :url => 'wp:comment_author_url',
16
+ :ip => 'wp:comment_author_ip',
17
+ :date => 'wp:comment_date',
18
+ :date_gmt => 'wp:comment_date_gmt',
19
+ :body => 'wp:comment_content',
20
+ :approved => 'wp:comment_approved',
21
+ :parent_id => 'wp:comment_parent' }
22
+ end
23
+
24
+ def parent
25
+ @post.comments.find{|c| c.comment_id == parent_id }
26
+ end
27
+
28
+ def children
29
+ @post.comments.select{|c| c.parent_id == comment_id}
30
+ end
31
+
32
+ def approved?
33
+ approved == '1'
34
+ end
35
+
36
+ def process_comment_id(comment_id)
37
+ comment_id.to_i
38
+ end
39
+
40
+ def process_parent_id(parent_id)
41
+ parent_id.to_i
42
+ end
43
+
44
+ def process_comment_id(comment_id)
45
+ comment_id.to_i
46
+ end
47
+
48
+ def process_date(date)
49
+ Time.parse(date)
50
+ end
51
+
52
+ def process_date_gmt(date)
53
+ Time.parse(date)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,19 @@
1
+ module WpxmlParser
2
+ class Entity
3
+ def initialize(entity)
4
+ @entity = entity
5
+ end
6
+
7
+ def method_missing(meth, *args, &blk)
8
+ if self.class.method_element_map.keys.include?(meth)
9
+ if (public_methods - Object.public_methods).include?("process_#{meth}")
10
+ send("process_#{meth}", @entity.xpath(self.class.method_element_map[meth]).first.content)
11
+ else
12
+ @entity.xpath(self.class.method_element_map[meth]).first.content
13
+ end
14
+ else
15
+ super
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,40 @@
1
+ require File.dirname(__FILE__) + '/entity'
2
+ require File.dirname(__FILE__) + '/comment'
3
+
4
+ module WpxmlParser
5
+ class Post < Entity
6
+ def self.method_element_map
7
+ { :title => 'title',
8
+ :link => 'link',
9
+ :date => 'pubDate',
10
+ :author => 'dc:creator',
11
+ :body => 'content:encoded',
12
+ :post_id => 'wp:post_id',
13
+ :slug => 'wp:post_name',
14
+ :status => 'wp:status',
15
+ :post_type => 'wp:post_type' }
16
+ end
17
+
18
+ def categories
19
+ @entity.xpath('category').map(&:content).uniq
20
+ end
21
+
22
+ def comments
23
+ @entity.xpath('wp:comment').map do |comment|
24
+ Comment.new(comment, self)
25
+ end.select(&:approved?)
26
+ end
27
+
28
+ def find_comment(comment_id)
29
+ comments.find{|c| c.comment_id == comment_id}
30
+ end
31
+
32
+ def process_date(date)
33
+ Time.parse(date)
34
+ end
35
+
36
+ def process_post_id(post_id)
37
+ post_id.to_i
38
+ end
39
+ end
40
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wpxml_parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Maxim Chernyak
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-08-14 00:00:00 -04:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Convenient parser that provides a clean way to interact with wordpress's XML dump file in your ruby scripts, to make it easy to migrate away from WordPress anywhere else.
36
+ email: max@bitsonnet.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files: []
42
+
43
+ files:
44
+ - lib/wpxml_parser/blog.rb
45
+ - lib/wpxml_parser/comment.rb
46
+ - lib/wpxml_parser/entity.rb
47
+ - lib/wpxml_parser/post.rb
48
+ - lib/wpxml_parser.rb
49
+ - LICENSE
50
+ - README.md
51
+ has_rdoc: true
52
+ homepage: http://github.com/maxim/wpxml_parser
53
+ licenses: []
54
+
55
+ post_install_message:
56
+ rdoc_options: []
57
+
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ hash: 19
75
+ segments:
76
+ - 1
77
+ - 3
78
+ - 4
79
+ version: 1.3.4
80
+ requirements: []
81
+
82
+ rubyforge_project: wpxml_parser
83
+ rubygems_version: 1.3.7
84
+ signing_key:
85
+ specification_version: 3
86
+ summary: Convenient WordPress XML dump parser.
87
+ test_files: []
88
+