wrxer 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.travis.yml +9 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +120 -0
- data/Rakefile +7 -0
- data/lib/wrxer.rb +36 -0
- data/lib/wrxer/attribute.rb +7 -0
- data/lib/wrxer/author.rb +10 -0
- data/lib/wrxer/category.rb +8 -0
- data/lib/wrxer/coercion.rb +60 -0
- data/lib/wrxer/comment.rb +16 -0
- data/lib/wrxer/comment_collection.rb +7 -0
- data/lib/wrxer/document.rb +17 -0
- data/lib/wrxer/image.rb +8 -0
- data/lib/wrxer/parser.rb +20 -0
- data/lib/wrxer/post.rb +24 -0
- data/lib/wrxer/post_collection.rb +6 -0
- data/lib/wrxer/postmeta.rb +7 -0
- data/lib/wrxer/postmeta_collection.rb +6 -0
- data/lib/wrxer/uri_parser.rb +20 -0
- data/lib/wrxer/version.rb +3 -0
- data/lib/wrxer/wrxer_collection.rb +48 -0
- data/lib/wrxer/wrxer_object.rb +52 -0
- data/spec/fixtures/missing_fields.xml +155 -0
- data/spec/fixtures/wrx.xml +159 -0
- data/spec/spec_helper.rb +12 -0
- data/spec/wrxer/author_spec.rb +13 -0
- data/spec/wrxer/category_spec.rb +35 -0
- data/spec/wrxer/document_spec.rb +29 -0
- data/spec/wrxer/image_spec.rb +13 -0
- data/spec/wrxer/parser_spec.rb +14 -0
- data/spec/wrxer/post_collection_spec.rb +25 -0
- data/spec/wrxer/post_spec.rb +82 -0
- data/spec/wrxer/postmeta_collection_spec.rb +17 -0
- data/spec/wrxer/postmeta_spec.rb +18 -0
- data/spec/wrxer/uri_parser_spec.rb +14 -0
- data/spec/wrxer_spec.rb +14 -0
- data/wrxer.gemspec +26 -0
- metadata +168 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NzliM2Y1YTA1YzY4MTY1MDM5OWMwZGFhMjQxMzY2NjVjNDVlODVlOA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NWU3NDkzNDUxMGI2Njg5NzA5M2ViYTIxMWNlMjc4ZTg3ZDM5MTQ3Nw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ODVjYTNkYzA3YzdhNTQwMTFhOTlmZjFiMDc2NmM1MzFlZGVkODVmOTU2MjFk
|
10
|
+
MjJhOGQ4NTRiMjVmMTljNzJjMjlkMDBkNGNjMzdkNWFlNDViNDhhYjI0MmY4
|
11
|
+
YTg5MjAwNDAyYTBhMTRmZTVkNWNlYTM1ODlmMTUwOGJjOGYyZDc=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MjY4MTEyNzFmMTk2OTJkMTFmOGQ4N2M3N2QyZTgwNzM0ZTlhNDM0NGNjNGVl
|
14
|
+
NTRmMWUxNzc3ZTQzNmQ4MWE3NjdkZjhmYjE3MDE3Mjc3ZGY0MzRmYzYwZWU0
|
15
|
+
NWVlMWUzYTNmNDE1MTlkOWViZWEwYzExNWY1NGYzNGQ0YTM3OWY=
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 2.2.0
|
4
|
+
deploy:
|
5
|
+
provider: rubygems
|
6
|
+
on:
|
7
|
+
tags: true
|
8
|
+
api_key:
|
9
|
+
secure: HVIv1E7b3qrAKtX6wJBU7KXBKWvKZt3lcBt/rxbOA1CLjPfT3cXwgaBViD0h+RRUnlsAvsOBJqRTzkjdoAH9BBnF0UqAHaVc2q1OO/n3MWB78nxYkqINBEIMNeyjA3exGgRoCxpl9zN62ChAeNkULe7shAlBngP2S3HLwrVgIhE=
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Patrick Schmitz
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
# Wrxer
|
2
|
+
|
3
|
+
This gem is a simple parser for Wordpress eXtended RSS. At the moment it
|
4
|
+
supports many of the parameters related to posts, it doesn't currenly support
|
5
|
+
page exports.
|
6
|
+
|
7
|
+
The document and its attributes can be easily converted to json or a ruby hash
|
8
|
+
by calling `#to_hash` or `#to_json`. Collections like PostCollection,
|
9
|
+
PostmetaCollection, and CommentCollection are returned as lazy enuemrables
|
10
|
+
when converting the document to a hash or json, but can be enumerated to
|
11
|
+
retrieve the hash or json objects.
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
Add this line to your application's Gemfile:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
gem 'wrxer'
|
19
|
+
```
|
20
|
+
|
21
|
+
And then execute:
|
22
|
+
|
23
|
+
$ bundle
|
24
|
+
|
25
|
+
Or install it yourself as:
|
26
|
+
|
27
|
+
$ gem install wrxer
|
28
|
+
|
29
|
+
## Usage
|
30
|
+
|
31
|
+
Wrxer can be user to parse posts from a Wordpress export. Take a look at
|
32
|
+
`lib/wrxer/post` to see which attributes are supported.
|
33
|
+
|
34
|
+
### Parse from File
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
document = Wrxer.parse("dir/to/export.xml")
|
38
|
+
=> #<Wrxer::Document:0x3fd8091d3488> Attributes: ...
|
39
|
+
```
|
40
|
+
|
41
|
+
### Parse from URI
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
document = Wrxer.parse_uri("https://raw.githubusercontent.com/bullfight/wrxer/master/spec/fixtures/wrx.xml")
|
45
|
+
=> #<Wrxer::Document:0x3fd8091d3488> Attributes: ...
|
46
|
+
```
|
47
|
+
|
48
|
+
### Reading a parsed document
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
document = Wrxer.parse("export.xml")
|
52
|
+
=> #<Wrxer::Document:0x3fd8091d3488> Attributes: {
|
53
|
+
"title": "Wrxer News",
|
54
|
+
"link": "https://wrxernews.wordpress.com",
|
55
|
+
"description": "The Most Reliable Source For Wrxer News Since 2007.",
|
56
|
+
"pub_date": "2015-03-24 21:18:58 +0000",
|
57
|
+
"language": "en",
|
58
|
+
"wrx_version": null,
|
59
|
+
"base_site_url": "http://wordpress.com/",
|
60
|
+
"base_blog_url": "https://wrxernews.wordpress.com",
|
61
|
+
"author": {
|
62
|
+
"login": "wrxernews",
|
63
|
+
"email": null,
|
64
|
+
"display_name": "Wrxer News",
|
65
|
+
"first_name": "",
|
66
|
+
"last_name": ""
|
67
|
+
},
|
68
|
+
"generator": "http://wordpress.com/",
|
69
|
+
"image": {
|
70
|
+
"url": "https://secure.gravatar.com/blavatar/foobar",
|
71
|
+
"title": " » Wrxer News",
|
72
|
+
"link": "https://wrxernews.wordpress.com"
|
73
|
+
},
|
74
|
+
"posts": {
|
75
|
+
"data": "#<Enumerator::Lazy:0x007fb01303ee20>"
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
post = document.posts.first
|
80
|
+
=> #<Wrxer::Post:0x3fd4799693ac> Attributes: {
|
81
|
+
"title": "Welcome To Wrxer News.",
|
82
|
+
"link": "https://wrxernews.wordpress.com/2007/11/17/welcome-to-wrxer-news/",
|
83
|
+
"pub_date": "2007-11-17 21:30:51 +0000",
|
84
|
+
"creator": "wrxernews",
|
85
|
+
"content": "Welcome to <strong>Wrxer News</strong> - The most up-to-date and reliable source for Wrxer news.",
|
86
|
+
"excerpt": "Excerpt Text",
|
87
|
+
"id": 3,
|
88
|
+
"published_at": "2007-11-17 21:30:51 -0800",
|
89
|
+
"comment_status": "open",
|
90
|
+
"ping_status": "open",
|
91
|
+
"name": "welcome-to-wrxer-news",
|
92
|
+
"status": "publish",
|
93
|
+
"parent": 0,
|
94
|
+
"menu_order": 0,
|
95
|
+
"type": "post",
|
96
|
+
"is_sticky": 0,
|
97
|
+
"category": {
|
98
|
+
"domain": "category",
|
99
|
+
"nicename": "wrxer-news",
|
100
|
+
"body": "Wrxer News"
|
101
|
+
},
|
102
|
+
"postmetas": {
|
103
|
+
"data": "#<Enumerator::Lazy:0x007fa8f315a970>"
|
104
|
+
},
|
105
|
+
"comments": {
|
106
|
+
"data": "#<Enumerator::Lazy:0x007fa8f315a3f8>"
|
107
|
+
}
|
108
|
+
}
|
109
|
+
post.title
|
110
|
+
=> "Welcome To Wrxer News."
|
111
|
+
|
112
|
+
```
|
113
|
+
|
114
|
+
## Contributing
|
115
|
+
|
116
|
+
1. Fork it ( https://github.com/[my-github-username]/wrxer/fork )
|
117
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
118
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
119
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
120
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/lib/wrxer.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require "wrxer/version"
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'time'
|
4
|
+
require 'json'
|
5
|
+
require 'open-uri'
|
6
|
+
|
7
|
+
|
8
|
+
require 'wrxer/coercion'
|
9
|
+
require 'wrxer/attribute'
|
10
|
+
require 'wrxer/wrxer_object'
|
11
|
+
require 'wrxer/wrxer_collection'
|
12
|
+
|
13
|
+
require 'wrxer/category'
|
14
|
+
require 'wrxer/postmeta'
|
15
|
+
require 'wrxer/postmeta_collection'
|
16
|
+
require 'wrxer/comment'
|
17
|
+
require 'wrxer/comment_collection'
|
18
|
+
|
19
|
+
require 'wrxer/post'
|
20
|
+
require 'wrxer/post_collection'
|
21
|
+
require 'wrxer/author'
|
22
|
+
require 'wrxer/image'
|
23
|
+
require 'wrxer/document'
|
24
|
+
|
25
|
+
require 'wrxer/parser'
|
26
|
+
require 'wrxer/uri_parser'
|
27
|
+
|
28
|
+
module Wrxer
|
29
|
+
def self.parse(file)
|
30
|
+
Parser.new(file).call
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.parse_uri(uri)
|
34
|
+
URIParser.new(uri).call
|
35
|
+
end
|
36
|
+
end
|
data/lib/wrxer/author.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Author < WrxerObject
|
3
|
+
xpath 'wp:author'
|
4
|
+
attribute :login, "wp:author_login"
|
5
|
+
attribute :email, "wp:autor_email"
|
6
|
+
attribute :display_name, "wp:author_display_name"
|
7
|
+
attribute :first_name, "wp:author_first_name"
|
8
|
+
attribute :last_name, "wp:author_last_name"
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Coercion
|
3
|
+
def self.xpath(value)
|
4
|
+
@xpath = value
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.call(document, params = {})
|
8
|
+
root = @xpath || params[:xpath]
|
9
|
+
unless document.name == root
|
10
|
+
document = document.at_xpath(root)
|
11
|
+
end
|
12
|
+
|
13
|
+
document.nil? ? nil : self.coerce(document)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.coerce(document)
|
17
|
+
self.new(document)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class TextAttribute < Coercion
|
22
|
+
def self.coerce(document)
|
23
|
+
document.text
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class IntegerAttribute < Coercion
|
28
|
+
def self.coerce(document)
|
29
|
+
Integer(document.text)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class TimeAttribute < Coercion
|
34
|
+
def self.coerce(document)
|
35
|
+
Time.parse(document.text)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class ElementAttribute < Coercion
|
40
|
+
def self.call(document, params = {})
|
41
|
+
element = document.attributes[params[:xpath].to_s]
|
42
|
+
element.nil? ? nil : self.coerce(element)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.coerce(document)
|
46
|
+
document.value
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class ChildAttribute < Coercion
|
51
|
+
def self.call(document, params = {})
|
52
|
+
child = document.children
|
53
|
+
child.empty? ? nil : self.coerce(child)
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.coerce(document)
|
57
|
+
document.text
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Comment < WrxerObject
|
3
|
+
xpath 'comment'
|
4
|
+
attribute :id, IntegerAttribute
|
5
|
+
attribute :author, "wp:comment_author"
|
6
|
+
attribute :email, "wp:author_email"
|
7
|
+
attribute :url, "wp:author_url"
|
8
|
+
attribute :ip, "wp:author_ip"
|
9
|
+
attribute :published_at, "wp:comment_date_gmt", TimeAttribute
|
10
|
+
attribute :content, "wp:comment_content"
|
11
|
+
attribute :approved, "wp:comment_approved", IntegerAttribute
|
12
|
+
attribute :type, "wp:comment_type"
|
13
|
+
attribute :parent, "wp:comment_parent", IntegerAttribute
|
14
|
+
attribute :user_id, "wp:comment_user_id", IntegerAttribute
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Document < WrxerObject
|
3
|
+
xpath '//channel'
|
4
|
+
attribute :title
|
5
|
+
attribute :link
|
6
|
+
attribute :description
|
7
|
+
attribute :pub_date, "pubDate", TimeAttribute
|
8
|
+
attribute :language
|
9
|
+
attribute :wrx_version, "wp:wrx_version"
|
10
|
+
attribute :base_site_url, "wp:base_site_url"
|
11
|
+
attribute :base_blog_url, "wp:base_blog_url"
|
12
|
+
attribute :author, "wp:author", Author
|
13
|
+
attribute :generator
|
14
|
+
attribute :image, "image", Image
|
15
|
+
attribute :posts, "item", PostCollection
|
16
|
+
end
|
17
|
+
end
|
data/lib/wrxer/image.rb
ADDED
data/lib/wrxer/parser.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Parser
|
3
|
+
attr_reader :filename, :xml_document, :document
|
4
|
+
def initialize(filename)
|
5
|
+
@filename = filename
|
6
|
+
|
7
|
+
File.open(filename) do |file|
|
8
|
+
@xml_document = Nokogiri::XML(file)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def call
|
13
|
+
@document ||= Document.call(@xml_document)
|
14
|
+
end
|
15
|
+
|
16
|
+
def inspect
|
17
|
+
"#<#{self.class}:0x#{self.object_id.to_s(16)}> { filename: #{@filename.to_path}, xml_document: #{@xml_document.class} }"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/wrxer/post.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Post < WrxerObject
|
3
|
+
xpath "item"
|
4
|
+
attribute :title
|
5
|
+
attribute :link
|
6
|
+
attribute :pub_date, "pubDate", TimeAttribute
|
7
|
+
attribute :creator, "dc:creator"
|
8
|
+
attribute :content, "content:encoded"
|
9
|
+
attribute :excerpt, "excerpt:encoded"
|
10
|
+
attribute :id, "wp:post_id", IntegerAttribute
|
11
|
+
attribute :published_at, 'wp:post_date_gmt', TimeAttribute
|
12
|
+
attribute :comment_status, "wp:comment_status"
|
13
|
+
attribute :ping_status, "wp:ping_status"
|
14
|
+
attribute :name, 'wp:post_name'
|
15
|
+
attribute :status, "wp:status"
|
16
|
+
attribute :parent, "wp:post_parent", IntegerAttribute
|
17
|
+
attribute :menu_order, "wp:menu_order", IntegerAttribute
|
18
|
+
attribute :type, "wp:post_type"
|
19
|
+
attribute :is_sticky, "wp:is_sticky", IntegerAttribute
|
20
|
+
attribute :category, "category", Category
|
21
|
+
attribute :postmetas, 'item', PostmetaCollection
|
22
|
+
attribute :comments, 'item', CommentCollection
|
23
|
+
end
|
24
|
+
end
|