wrxer 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NzliM2Y1YTA1YzY4MTY1MDM5OWMwZGFhMjQxMzY2NjVjNDVlODVlOA==
5
+ data.tar.gz: !binary |-
6
+ NWU3NDkzNDUxMGI2Njg5NzA5M2ViYTIxMWNlMjc4ZTg3ZDM5MTQ3Nw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODVjYTNkYzA3YzdhNTQwMTFhOTlmZjFiMDc2NmM1MzFlZGVkODVmOTU2MjFk
10
+ MjJhOGQ4NTRiMjVmMTljNzJjMjlkMDBkNGNjMzdkNWFlNDViNDhhYjI0MmY4
11
+ YTg5MjAwNDAyYTBhMTRmZTVkNWNlYTM1ODlmMTUwOGJjOGYyZDc=
12
+ data.tar.gz: !binary |-
13
+ MjY4MTEyNzFmMTk2OTJkMTFmOGQ4N2M3N2QyZTgwNzM0ZTlhNDM0NGNjNGVl
14
+ NTRmMWUxNzc3ZTQzNmQ4MWE3NjdkZjhmYjE3MDE3Mjc3ZGY0MzRmYzYwZWU0
15
+ NWVlMWUzYTNmNDE1MTlkOWViZWEwYzExNWY1NGYzNGQ0YTM3OWY=
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.0
4
+ deploy:
5
+ provider: rubygems
6
+ on:
7
+ tags: true
8
+ api_key:
9
+ secure: HVIv1E7b3qrAKtX6wJBU7KXBKWvKZt3lcBt/rxbOA1CLjPfT3cXwgaBViD0h+RRUnlsAvsOBJqRTzkjdoAH9BBnF0UqAHaVc2q1OO/n3MWB78nxYkqINBEIMNeyjA3exGgRoCxpl9zN62ChAeNkULe7shAlBngP2S3HLwrVgIhE=
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in wrxer.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Patrick Schmitz
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,120 @@
1
+ # Wrxer
2
+
3
+ This gem is a simple parser for Wordpress eXtended RSS. At the moment it
4
+ supports many of the parameters related to posts, it doesn't currenly support
5
+ page exports.
6
+
7
+ The document and its attributes can be easily converted to json or a ruby hash
8
+ by calling `#to_hash` or `#to_json`. Collections like PostCollection,
9
+ PostmetaCollection, and CommentCollection are returned as lazy enuemrables
10
+ when converting the document to a hash or json, but can be enumerated to
11
+ retrieve the hash or json objects.
12
+
13
+ ## Installation
14
+
15
+ Add this line to your application's Gemfile:
16
+
17
+ ```ruby
18
+ gem 'wrxer'
19
+ ```
20
+
21
+ And then execute:
22
+
23
+ $ bundle
24
+
25
+ Or install it yourself as:
26
+
27
+ $ gem install wrxer
28
+
29
+ ## Usage
30
+
31
+ Wrxer can be user to parse posts from a Wordpress export. Take a look at
32
+ `lib/wrxer/post` to see which attributes are supported.
33
+
34
+ ### Parse from File
35
+
36
+ ```ruby
37
+ document = Wrxer.parse("dir/to/export.xml")
38
+ => #<Wrxer::Document:0x3fd8091d3488> Attributes: ...
39
+ ```
40
+
41
+ ### Parse from URI
42
+
43
+ ```ruby
44
+ document = Wrxer.parse_uri("https://raw.githubusercontent.com/bullfight/wrxer/master/spec/fixtures/wrx.xml")
45
+ => #<Wrxer::Document:0x3fd8091d3488> Attributes: ...
46
+ ```
47
+
48
+ ### Reading a parsed document
49
+
50
+ ```ruby
51
+ document = Wrxer.parse("export.xml")
52
+ => #<Wrxer::Document:0x3fd8091d3488> Attributes: {
53
+ "title": "Wrxer News",
54
+ "link": "https://wrxernews.wordpress.com",
55
+ "description": "The Most Reliable Source For Wrxer News Since 2007.",
56
+ "pub_date": "2015-03-24 21:18:58 +0000",
57
+ "language": "en",
58
+ "wrx_version": null,
59
+ "base_site_url": "http://wordpress.com/",
60
+ "base_blog_url": "https://wrxernews.wordpress.com",
61
+ "author": {
62
+ "login": "wrxernews",
63
+ "email": null,
64
+ "display_name": "Wrxer News",
65
+ "first_name": "",
66
+ "last_name": ""
67
+ },
68
+ "generator": "http://wordpress.com/",
69
+ "image": {
70
+ "url": "https://secure.gravatar.com/blavatar/foobar",
71
+ "title": " » Wrxer News",
72
+ "link": "https://wrxernews.wordpress.com"
73
+ },
74
+ "posts": {
75
+ "data": "#<Enumerator::Lazy:0x007fb01303ee20>"
76
+ }
77
+ }
78
+
79
+ post = document.posts.first
80
+ => #<Wrxer::Post:0x3fd4799693ac> Attributes: {
81
+ "title": "Welcome To Wrxer News.",
82
+ "link": "https://wrxernews.wordpress.com/2007/11/17/welcome-to-wrxer-news/",
83
+ "pub_date": "2007-11-17 21:30:51 +0000",
84
+ "creator": "wrxernews",
85
+ "content": "Welcome to <strong>Wrxer News</strong> - The most up-to-date and reliable source for Wrxer news.",
86
+ "excerpt": "Excerpt Text",
87
+ "id": 3,
88
+ "published_at": "2007-11-17 21:30:51 -0800",
89
+ "comment_status": "open",
90
+ "ping_status": "open",
91
+ "name": "welcome-to-wrxer-news",
92
+ "status": "publish",
93
+ "parent": 0,
94
+ "menu_order": 0,
95
+ "type": "post",
96
+ "is_sticky": 0,
97
+ "category": {
98
+ "domain": "category",
99
+ "nicename": "wrxer-news",
100
+ "body": "Wrxer News"
101
+ },
102
+ "postmetas": {
103
+ "data": "#<Enumerator::Lazy:0x007fa8f315a970>"
104
+ },
105
+ "comments": {
106
+ "data": "#<Enumerator::Lazy:0x007fa8f315a3f8>"
107
+ }
108
+ }
109
+ post.title
110
+ => "Welcome To Wrxer News."
111
+
112
+ ```
113
+
114
+ ## Contributing
115
+
116
+ 1. Fork it ( https://github.com/[my-github-username]/wrxer/fork )
117
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
118
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
119
+ 4. Push to the branch (`git push origin my-new-feature`)
120
+ 5. Create a new Pull Request
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+
@@ -0,0 +1,36 @@
1
+ require "wrxer/version"
2
+ require 'nokogiri'
3
+ require 'time'
4
+ require 'json'
5
+ require 'open-uri'
6
+
7
+
8
+ require 'wrxer/coercion'
9
+ require 'wrxer/attribute'
10
+ require 'wrxer/wrxer_object'
11
+ require 'wrxer/wrxer_collection'
12
+
13
+ require 'wrxer/category'
14
+ require 'wrxer/postmeta'
15
+ require 'wrxer/postmeta_collection'
16
+ require 'wrxer/comment'
17
+ require 'wrxer/comment_collection'
18
+
19
+ require 'wrxer/post'
20
+ require 'wrxer/post_collection'
21
+ require 'wrxer/author'
22
+ require 'wrxer/image'
23
+ require 'wrxer/document'
24
+
25
+ require 'wrxer/parser'
26
+ require 'wrxer/uri_parser'
27
+
28
+ module Wrxer
29
+ def self.parse(file)
30
+ Parser.new(file).call
31
+ end
32
+
33
+ def self.parse_uri(uri)
34
+ URIParser.new(uri).call
35
+ end
36
+ end
@@ -0,0 +1,7 @@
1
+ module Wrxer
2
+ class Attribute < Struct.new(:name, :xpath, :coercion)
3
+ def call(document, params = {})
4
+ coercion.call(document, { xpath: xpath })
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ module Wrxer
2
+ class Author < WrxerObject
3
+ xpath 'wp:author'
4
+ attribute :login, "wp:author_login"
5
+ attribute :email, "wp:autor_email"
6
+ attribute :display_name, "wp:author_display_name"
7
+ attribute :first_name, "wp:author_first_name"
8
+ attribute :last_name, "wp:author_last_name"
9
+ end
10
+ end
@@ -0,0 +1,8 @@
1
+ module Wrxer
2
+ class Category < WrxerObject
3
+ xpath 'category'
4
+ attribute :domain, 'domain', ElementAttribute
5
+ attribute :nicename, 'nicename', ElementAttribute
6
+ attribute :body, nil, ChildAttribute
7
+ end
8
+ end
@@ -0,0 +1,60 @@
1
+ module Wrxer
2
+ class Coercion
3
+ def self.xpath(value)
4
+ @xpath = value
5
+ end
6
+
7
+ def self.call(document, params = {})
8
+ root = @xpath || params[:xpath]
9
+ unless document.name == root
10
+ document = document.at_xpath(root)
11
+ end
12
+
13
+ document.nil? ? nil : self.coerce(document)
14
+ end
15
+
16
+ def self.coerce(document)
17
+ self.new(document)
18
+ end
19
+ end
20
+
21
+ class TextAttribute < Coercion
22
+ def self.coerce(document)
23
+ document.text
24
+ end
25
+ end
26
+
27
+ class IntegerAttribute < Coercion
28
+ def self.coerce(document)
29
+ Integer(document.text)
30
+ end
31
+ end
32
+
33
+ class TimeAttribute < Coercion
34
+ def self.coerce(document)
35
+ Time.parse(document.text)
36
+ end
37
+ end
38
+
39
+ class ElementAttribute < Coercion
40
+ def self.call(document, params = {})
41
+ element = document.attributes[params[:xpath].to_s]
42
+ element.nil? ? nil : self.coerce(element)
43
+ end
44
+
45
+ def self.coerce(document)
46
+ document.value
47
+ end
48
+ end
49
+
50
+ class ChildAttribute < Coercion
51
+ def self.call(document, params = {})
52
+ child = document.children
53
+ child.empty? ? nil : self.coerce(child)
54
+ end
55
+
56
+ def self.coerce(document)
57
+ document.text
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,16 @@
1
+ module Wrxer
2
+ class Comment < WrxerObject
3
+ xpath 'comment'
4
+ attribute :id, IntegerAttribute
5
+ attribute :author, "wp:comment_author"
6
+ attribute :email, "wp:author_email"
7
+ attribute :url, "wp:author_url"
8
+ attribute :ip, "wp:author_ip"
9
+ attribute :published_at, "wp:comment_date_gmt", TimeAttribute
10
+ attribute :content, "wp:comment_content"
11
+ attribute :approved, "wp:comment_approved", IntegerAttribute
12
+ attribute :type, "wp:comment_type"
13
+ attribute :parent, "wp:comment_parent", IntegerAttribute
14
+ attribute :user_id, "wp:comment_user_id", IntegerAttribute
15
+ end
16
+ end
@@ -0,0 +1,7 @@
1
+ module Wrxer
2
+ class CommentCollection < WrxerCollection
3
+ xpath "item"
4
+ collection :comments, "//wp:comment", Comment
5
+ end
6
+ end
7
+
@@ -0,0 +1,17 @@
1
+ module Wrxer
2
+ class Document < WrxerObject
3
+ xpath '//channel'
4
+ attribute :title
5
+ attribute :link
6
+ attribute :description
7
+ attribute :pub_date, "pubDate", TimeAttribute
8
+ attribute :language
9
+ attribute :wrx_version, "wp:wrx_version"
10
+ attribute :base_site_url, "wp:base_site_url"
11
+ attribute :base_blog_url, "wp:base_blog_url"
12
+ attribute :author, "wp:author", Author
13
+ attribute :generator
14
+ attribute :image, "image", Image
15
+ attribute :posts, "item", PostCollection
16
+ end
17
+ end
@@ -0,0 +1,8 @@
1
+ module Wrxer
2
+ class Image < WrxerObject
3
+ xpath 'image'
4
+ attribute :url
5
+ attribute :title
6
+ attribute :link
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ module Wrxer
2
+ class Parser
3
+ attr_reader :filename, :xml_document, :document
4
+ def initialize(filename)
5
+ @filename = filename
6
+
7
+ File.open(filename) do |file|
8
+ @xml_document = Nokogiri::XML(file)
9
+ end
10
+ end
11
+
12
+ def call
13
+ @document ||= Document.call(@xml_document)
14
+ end
15
+
16
+ def inspect
17
+ "#<#{self.class}:0x#{self.object_id.to_s(16)}> { filename: #{@filename.to_path}, xml_document: #{@xml_document.class} }"
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,24 @@
1
+ module Wrxer
2
+ class Post < WrxerObject
3
+ xpath "item"
4
+ attribute :title
5
+ attribute :link
6
+ attribute :pub_date, "pubDate", TimeAttribute
7
+ attribute :creator, "dc:creator"
8
+ attribute :content, "content:encoded"
9
+ attribute :excerpt, "excerpt:encoded"
10
+ attribute :id, "wp:post_id", IntegerAttribute
11
+ attribute :published_at, 'wp:post_date_gmt', TimeAttribute
12
+ attribute :comment_status, "wp:comment_status"
13
+ attribute :ping_status, "wp:ping_status"
14
+ attribute :name, 'wp:post_name'
15
+ attribute :status, "wp:status"
16
+ attribute :parent, "wp:post_parent", IntegerAttribute
17
+ attribute :menu_order, "wp:menu_order", IntegerAttribute
18
+ attribute :type, "wp:post_type"
19
+ attribute :is_sticky, "wp:is_sticky", IntegerAttribute
20
+ attribute :category, "category", Category
21
+ attribute :postmetas, 'item', PostmetaCollection
22
+ attribute :comments, 'item', CommentCollection
23
+ end
24
+ end
@@ -0,0 +1,6 @@
1
+ module Wrxer
2
+ class PostCollection < WrxerCollection
3
+ xpath "//channel"
4
+ collection :posts, "//item", Post
5
+ end
6
+ end
@@ -0,0 +1,7 @@
1
+ module Wrxer
2
+ class Postmeta < WrxerObject
3
+ xpath 'postmeta'
4
+ attribute :key, "wp:meta_key"
5
+ attribute :value, "wp:meta_value", IntegerAttribute
6
+ end
7
+ end