wrxer 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.travis.yml +9 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +120 -0
- data/Rakefile +7 -0
- data/lib/wrxer.rb +36 -0
- data/lib/wrxer/attribute.rb +7 -0
- data/lib/wrxer/author.rb +10 -0
- data/lib/wrxer/category.rb +8 -0
- data/lib/wrxer/coercion.rb +60 -0
- data/lib/wrxer/comment.rb +16 -0
- data/lib/wrxer/comment_collection.rb +7 -0
- data/lib/wrxer/document.rb +17 -0
- data/lib/wrxer/image.rb +8 -0
- data/lib/wrxer/parser.rb +20 -0
- data/lib/wrxer/post.rb +24 -0
- data/lib/wrxer/post_collection.rb +6 -0
- data/lib/wrxer/postmeta.rb +7 -0
- data/lib/wrxer/postmeta_collection.rb +6 -0
- data/lib/wrxer/uri_parser.rb +20 -0
- data/lib/wrxer/version.rb +3 -0
- data/lib/wrxer/wrxer_collection.rb +48 -0
- data/lib/wrxer/wrxer_object.rb +52 -0
- data/spec/fixtures/missing_fields.xml +155 -0
- data/spec/fixtures/wrx.xml +159 -0
- data/spec/spec_helper.rb +12 -0
- data/spec/wrxer/author_spec.rb +13 -0
- data/spec/wrxer/category_spec.rb +35 -0
- data/spec/wrxer/document_spec.rb +29 -0
- data/spec/wrxer/image_spec.rb +13 -0
- data/spec/wrxer/parser_spec.rb +14 -0
- data/spec/wrxer/post_collection_spec.rb +25 -0
- data/spec/wrxer/post_spec.rb +82 -0
- data/spec/wrxer/postmeta_collection_spec.rb +17 -0
- data/spec/wrxer/postmeta_spec.rb +18 -0
- data/spec/wrxer/uri_parser_spec.rb +14 -0
- data/spec/wrxer_spec.rb +14 -0
- data/wrxer.gemspec +26 -0
- metadata +168 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NzliM2Y1YTA1YzY4MTY1MDM5OWMwZGFhMjQxMzY2NjVjNDVlODVlOA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NWU3NDkzNDUxMGI2Njg5NzA5M2ViYTIxMWNlMjc4ZTg3ZDM5MTQ3Nw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ODVjYTNkYzA3YzdhNTQwMTFhOTlmZjFiMDc2NmM1MzFlZGVkODVmOTU2MjFk
|
10
|
+
MjJhOGQ4NTRiMjVmMTljNzJjMjlkMDBkNGNjMzdkNWFlNDViNDhhYjI0MmY4
|
11
|
+
YTg5MjAwNDAyYTBhMTRmZTVkNWNlYTM1ODlmMTUwOGJjOGYyZDc=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MjY4MTEyNzFmMTk2OTJkMTFmOGQ4N2M3N2QyZTgwNzM0ZTlhNDM0NGNjNGVl
|
14
|
+
NTRmMWUxNzc3ZTQzNmQ4MWE3NjdkZjhmYjE3MDE3Mjc3ZGY0MzRmYzYwZWU0
|
15
|
+
NWVlMWUzYTNmNDE1MTlkOWViZWEwYzExNWY1NGYzNGQ0YTM3OWY=
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 2.2.0
|
4
|
+
deploy:
|
5
|
+
provider: rubygems
|
6
|
+
on:
|
7
|
+
tags: true
|
8
|
+
api_key:
|
9
|
+
secure: HVIv1E7b3qrAKtX6wJBU7KXBKWvKZt3lcBt/rxbOA1CLjPfT3cXwgaBViD0h+RRUnlsAvsOBJqRTzkjdoAH9BBnF0UqAHaVc2q1OO/n3MWB78nxYkqINBEIMNeyjA3exGgRoCxpl9zN62ChAeNkULe7shAlBngP2S3HLwrVgIhE=
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Patrick Schmitz
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
# Wrxer
|
2
|
+
|
3
|
+
This gem is a simple parser for Wordpress eXtended RSS. At the moment it
|
4
|
+
supports many of the parameters related to posts, it doesn't currenly support
|
5
|
+
page exports.
|
6
|
+
|
7
|
+
The document and its attributes can be easily converted to json or a ruby hash
|
8
|
+
by calling `#to_hash` or `#to_json`. Collections like PostCollection,
|
9
|
+
PostmetaCollection, and CommentCollection are returned as lazy enuemrables
|
10
|
+
when converting the document to a hash or json, but can be enumerated to
|
11
|
+
retrieve the hash or json objects.
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
Add this line to your application's Gemfile:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
gem 'wrxer'
|
19
|
+
```
|
20
|
+
|
21
|
+
And then execute:
|
22
|
+
|
23
|
+
$ bundle
|
24
|
+
|
25
|
+
Or install it yourself as:
|
26
|
+
|
27
|
+
$ gem install wrxer
|
28
|
+
|
29
|
+
## Usage
|
30
|
+
|
31
|
+
Wrxer can be user to parse posts from a Wordpress export. Take a look at
|
32
|
+
`lib/wrxer/post` to see which attributes are supported.
|
33
|
+
|
34
|
+
### Parse from File
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
document = Wrxer.parse("dir/to/export.xml")
|
38
|
+
=> #<Wrxer::Document:0x3fd8091d3488> Attributes: ...
|
39
|
+
```
|
40
|
+
|
41
|
+
### Parse from URI
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
document = Wrxer.parse_uri("https://raw.githubusercontent.com/bullfight/wrxer/master/spec/fixtures/wrx.xml")
|
45
|
+
=> #<Wrxer::Document:0x3fd8091d3488> Attributes: ...
|
46
|
+
```
|
47
|
+
|
48
|
+
### Reading a parsed document
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
document = Wrxer.parse("export.xml")
|
52
|
+
=> #<Wrxer::Document:0x3fd8091d3488> Attributes: {
|
53
|
+
"title": "Wrxer News",
|
54
|
+
"link": "https://wrxernews.wordpress.com",
|
55
|
+
"description": "The Most Reliable Source For Wrxer News Since 2007.",
|
56
|
+
"pub_date": "2015-03-24 21:18:58 +0000",
|
57
|
+
"language": "en",
|
58
|
+
"wrx_version": null,
|
59
|
+
"base_site_url": "http://wordpress.com/",
|
60
|
+
"base_blog_url": "https://wrxernews.wordpress.com",
|
61
|
+
"author": {
|
62
|
+
"login": "wrxernews",
|
63
|
+
"email": null,
|
64
|
+
"display_name": "Wrxer News",
|
65
|
+
"first_name": "",
|
66
|
+
"last_name": ""
|
67
|
+
},
|
68
|
+
"generator": "http://wordpress.com/",
|
69
|
+
"image": {
|
70
|
+
"url": "https://secure.gravatar.com/blavatar/foobar",
|
71
|
+
"title": " » Wrxer News",
|
72
|
+
"link": "https://wrxernews.wordpress.com"
|
73
|
+
},
|
74
|
+
"posts": {
|
75
|
+
"data": "#<Enumerator::Lazy:0x007fb01303ee20>"
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
post = document.posts.first
|
80
|
+
=> #<Wrxer::Post:0x3fd4799693ac> Attributes: {
|
81
|
+
"title": "Welcome To Wrxer News.",
|
82
|
+
"link": "https://wrxernews.wordpress.com/2007/11/17/welcome-to-wrxer-news/",
|
83
|
+
"pub_date": "2007-11-17 21:30:51 +0000",
|
84
|
+
"creator": "wrxernews",
|
85
|
+
"content": "Welcome to <strong>Wrxer News</strong> - The most up-to-date and reliable source for Wrxer news.",
|
86
|
+
"excerpt": "Excerpt Text",
|
87
|
+
"id": 3,
|
88
|
+
"published_at": "2007-11-17 21:30:51 -0800",
|
89
|
+
"comment_status": "open",
|
90
|
+
"ping_status": "open",
|
91
|
+
"name": "welcome-to-wrxer-news",
|
92
|
+
"status": "publish",
|
93
|
+
"parent": 0,
|
94
|
+
"menu_order": 0,
|
95
|
+
"type": "post",
|
96
|
+
"is_sticky": 0,
|
97
|
+
"category": {
|
98
|
+
"domain": "category",
|
99
|
+
"nicename": "wrxer-news",
|
100
|
+
"body": "Wrxer News"
|
101
|
+
},
|
102
|
+
"postmetas": {
|
103
|
+
"data": "#<Enumerator::Lazy:0x007fa8f315a970>"
|
104
|
+
},
|
105
|
+
"comments": {
|
106
|
+
"data": "#<Enumerator::Lazy:0x007fa8f315a3f8>"
|
107
|
+
}
|
108
|
+
}
|
109
|
+
post.title
|
110
|
+
=> "Welcome To Wrxer News."
|
111
|
+
|
112
|
+
```
|
113
|
+
|
114
|
+
## Contributing
|
115
|
+
|
116
|
+
1. Fork it ( https://github.com/[my-github-username]/wrxer/fork )
|
117
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
118
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
119
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
120
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/lib/wrxer.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require "wrxer/version"
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'time'
|
4
|
+
require 'json'
|
5
|
+
require 'open-uri'
|
6
|
+
|
7
|
+
|
8
|
+
require 'wrxer/coercion'
|
9
|
+
require 'wrxer/attribute'
|
10
|
+
require 'wrxer/wrxer_object'
|
11
|
+
require 'wrxer/wrxer_collection'
|
12
|
+
|
13
|
+
require 'wrxer/category'
|
14
|
+
require 'wrxer/postmeta'
|
15
|
+
require 'wrxer/postmeta_collection'
|
16
|
+
require 'wrxer/comment'
|
17
|
+
require 'wrxer/comment_collection'
|
18
|
+
|
19
|
+
require 'wrxer/post'
|
20
|
+
require 'wrxer/post_collection'
|
21
|
+
require 'wrxer/author'
|
22
|
+
require 'wrxer/image'
|
23
|
+
require 'wrxer/document'
|
24
|
+
|
25
|
+
require 'wrxer/parser'
|
26
|
+
require 'wrxer/uri_parser'
|
27
|
+
|
28
|
+
module Wrxer
|
29
|
+
def self.parse(file)
|
30
|
+
Parser.new(file).call
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.parse_uri(uri)
|
34
|
+
URIParser.new(uri).call
|
35
|
+
end
|
36
|
+
end
|
data/lib/wrxer/author.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Author < WrxerObject
|
3
|
+
xpath 'wp:author'
|
4
|
+
attribute :login, "wp:author_login"
|
5
|
+
attribute :email, "wp:autor_email"
|
6
|
+
attribute :display_name, "wp:author_display_name"
|
7
|
+
attribute :first_name, "wp:author_first_name"
|
8
|
+
attribute :last_name, "wp:author_last_name"
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Coercion
|
3
|
+
def self.xpath(value)
|
4
|
+
@xpath = value
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.call(document, params = {})
|
8
|
+
root = @xpath || params[:xpath]
|
9
|
+
unless document.name == root
|
10
|
+
document = document.at_xpath(root)
|
11
|
+
end
|
12
|
+
|
13
|
+
document.nil? ? nil : self.coerce(document)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.coerce(document)
|
17
|
+
self.new(document)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class TextAttribute < Coercion
|
22
|
+
def self.coerce(document)
|
23
|
+
document.text
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class IntegerAttribute < Coercion
|
28
|
+
def self.coerce(document)
|
29
|
+
Integer(document.text)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class TimeAttribute < Coercion
|
34
|
+
def self.coerce(document)
|
35
|
+
Time.parse(document.text)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class ElementAttribute < Coercion
|
40
|
+
def self.call(document, params = {})
|
41
|
+
element = document.attributes[params[:xpath].to_s]
|
42
|
+
element.nil? ? nil : self.coerce(element)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.coerce(document)
|
46
|
+
document.value
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class ChildAttribute < Coercion
|
51
|
+
def self.call(document, params = {})
|
52
|
+
child = document.children
|
53
|
+
child.empty? ? nil : self.coerce(child)
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.coerce(document)
|
57
|
+
document.text
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Comment < WrxerObject
|
3
|
+
xpath 'comment'
|
4
|
+
attribute :id, IntegerAttribute
|
5
|
+
attribute :author, "wp:comment_author"
|
6
|
+
attribute :email, "wp:author_email"
|
7
|
+
attribute :url, "wp:author_url"
|
8
|
+
attribute :ip, "wp:author_ip"
|
9
|
+
attribute :published_at, "wp:comment_date_gmt", TimeAttribute
|
10
|
+
attribute :content, "wp:comment_content"
|
11
|
+
attribute :approved, "wp:comment_approved", IntegerAttribute
|
12
|
+
attribute :type, "wp:comment_type"
|
13
|
+
attribute :parent, "wp:comment_parent", IntegerAttribute
|
14
|
+
attribute :user_id, "wp:comment_user_id", IntegerAttribute
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Document < WrxerObject
|
3
|
+
xpath '//channel'
|
4
|
+
attribute :title
|
5
|
+
attribute :link
|
6
|
+
attribute :description
|
7
|
+
attribute :pub_date, "pubDate", TimeAttribute
|
8
|
+
attribute :language
|
9
|
+
attribute :wrx_version, "wp:wrx_version"
|
10
|
+
attribute :base_site_url, "wp:base_site_url"
|
11
|
+
attribute :base_blog_url, "wp:base_blog_url"
|
12
|
+
attribute :author, "wp:author", Author
|
13
|
+
attribute :generator
|
14
|
+
attribute :image, "image", Image
|
15
|
+
attribute :posts, "item", PostCollection
|
16
|
+
end
|
17
|
+
end
|
data/lib/wrxer/image.rb
ADDED
data/lib/wrxer/parser.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Parser
|
3
|
+
attr_reader :filename, :xml_document, :document
|
4
|
+
def initialize(filename)
|
5
|
+
@filename = filename
|
6
|
+
|
7
|
+
File.open(filename) do |file|
|
8
|
+
@xml_document = Nokogiri::XML(file)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def call
|
13
|
+
@document ||= Document.call(@xml_document)
|
14
|
+
end
|
15
|
+
|
16
|
+
def inspect
|
17
|
+
"#<#{self.class}:0x#{self.object_id.to_s(16)}> { filename: #{@filename.to_path}, xml_document: #{@xml_document.class} }"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/wrxer/post.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Wrxer
|
2
|
+
class Post < WrxerObject
|
3
|
+
xpath "item"
|
4
|
+
attribute :title
|
5
|
+
attribute :link
|
6
|
+
attribute :pub_date, "pubDate", TimeAttribute
|
7
|
+
attribute :creator, "dc:creator"
|
8
|
+
attribute :content, "content:encoded"
|
9
|
+
attribute :excerpt, "excerpt:encoded"
|
10
|
+
attribute :id, "wp:post_id", IntegerAttribute
|
11
|
+
attribute :published_at, 'wp:post_date_gmt', TimeAttribute
|
12
|
+
attribute :comment_status, "wp:comment_status"
|
13
|
+
attribute :ping_status, "wp:ping_status"
|
14
|
+
attribute :name, 'wp:post_name'
|
15
|
+
attribute :status, "wp:status"
|
16
|
+
attribute :parent, "wp:post_parent", IntegerAttribute
|
17
|
+
attribute :menu_order, "wp:menu_order", IntegerAttribute
|
18
|
+
attribute :type, "wp:post_type"
|
19
|
+
attribute :is_sticky, "wp:is_sticky", IntegerAttribute
|
20
|
+
attribute :category, "category", Category
|
21
|
+
attribute :postmetas, 'item', PostmetaCollection
|
22
|
+
attribute :comments, 'item', CommentCollection
|
23
|
+
end
|
24
|
+
end
|