wrxer 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NzliM2Y1YTA1YzY4MTY1MDM5OWMwZGFhMjQxMzY2NjVjNDVlODVlOA==
5
+ data.tar.gz: !binary |-
6
+ NWU3NDkzNDUxMGI2Njg5NzA5M2ViYTIxMWNlMjc4ZTg3ZDM5MTQ3Nw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODVjYTNkYzA3YzdhNTQwMTFhOTlmZjFiMDc2NmM1MzFlZGVkODVmOTU2MjFk
10
+ MjJhOGQ4NTRiMjVmMTljNzJjMjlkMDBkNGNjMzdkNWFlNDViNDhhYjI0MmY4
11
+ YTg5MjAwNDAyYTBhMTRmZTVkNWNlYTM1ODlmMTUwOGJjOGYyZDc=
12
+ data.tar.gz: !binary |-
13
+ MjY4MTEyNzFmMTk2OTJkMTFmOGQ4N2M3N2QyZTgwNzM0ZTlhNDM0NGNjNGVl
14
+ NTRmMWUxNzc3ZTQzNmQ4MWE3NjdkZjhmYjE3MDE3Mjc3ZGY0MzRmYzYwZWU0
15
+ NWVlMWUzYTNmNDE1MTlkOWViZWEwYzExNWY1NGYzNGQ0YTM3OWY=
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.0
4
+ deploy:
5
+ provider: rubygems
6
+ on:
7
+ tags: true
8
+ api_key:
9
+ secure: HVIv1E7b3qrAKtX6wJBU7KXBKWvKZt3lcBt/rxbOA1CLjPfT3cXwgaBViD0h+RRUnlsAvsOBJqRTzkjdoAH9BBnF0UqAHaVc2q1OO/n3MWB78nxYkqINBEIMNeyjA3exGgRoCxpl9zN62ChAeNkULe7shAlBngP2S3HLwrVgIhE=
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in wrxer.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Patrick Schmitz
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,120 @@
1
+ # Wrxer
2
+
3
+ This gem is a simple parser for Wordpress eXtended RSS. At the moment it
4
+ supports many of the parameters related to posts, it doesn't currenly support
5
+ page exports.
6
+
7
+ The document and its attributes can be easily converted to json or a ruby hash
8
+ by calling `#to_hash` or `#to_json`. Collections like PostCollection,
9
+ PostmetaCollection, and CommentCollection are returned as lazy enuemrables
10
+ when converting the document to a hash or json, but can be enumerated to
11
+ retrieve the hash or json objects.
12
+
13
+ ## Installation
14
+
15
+ Add this line to your application's Gemfile:
16
+
17
+ ```ruby
18
+ gem 'wrxer'
19
+ ```
20
+
21
+ And then execute:
22
+
23
+ $ bundle
24
+
25
+ Or install it yourself as:
26
+
27
+ $ gem install wrxer
28
+
29
+ ## Usage
30
+
31
+ Wrxer can be user to parse posts from a Wordpress export. Take a look at
32
+ `lib/wrxer/post` to see which attributes are supported.
33
+
34
+ ### Parse from File
35
+
36
+ ```ruby
37
+ document = Wrxer.parse("dir/to/export.xml")
38
+ => #<Wrxer::Document:0x3fd8091d3488> Attributes: ...
39
+ ```
40
+
41
+ ### Parse from URI
42
+
43
+ ```ruby
44
+ document = Wrxer.parse_uri("https://raw.githubusercontent.com/bullfight/wrxer/master/spec/fixtures/wrx.xml")
45
+ => #<Wrxer::Document:0x3fd8091d3488> Attributes: ...
46
+ ```
47
+
48
+ ### Reading a parsed document
49
+
50
+ ```ruby
51
+ document = Wrxer.parse("export.xml")
52
+ => #<Wrxer::Document:0x3fd8091d3488> Attributes: {
53
+ "title": "Wrxer News",
54
+ "link": "https://wrxernews.wordpress.com",
55
+ "description": "The Most Reliable Source For Wrxer News Since 2007.",
56
+ "pub_date": "2015-03-24 21:18:58 +0000",
57
+ "language": "en",
58
+ "wrx_version": null,
59
+ "base_site_url": "http://wordpress.com/",
60
+ "base_blog_url": "https://wrxernews.wordpress.com",
61
+ "author": {
62
+ "login": "wrxernews",
63
+ "email": null,
64
+ "display_name": "Wrxer News",
65
+ "first_name": "",
66
+ "last_name": ""
67
+ },
68
+ "generator": "http://wordpress.com/",
69
+ "image": {
70
+ "url": "https://secure.gravatar.com/blavatar/foobar",
71
+ "title": " » Wrxer News",
72
+ "link": "https://wrxernews.wordpress.com"
73
+ },
74
+ "posts": {
75
+ "data": "#<Enumerator::Lazy:0x007fb01303ee20>"
76
+ }
77
+ }
78
+
79
+ post = document.posts.first
80
+ => #<Wrxer::Post:0x3fd4799693ac> Attributes: {
81
+ "title": "Welcome To Wrxer News.",
82
+ "link": "https://wrxernews.wordpress.com/2007/11/17/welcome-to-wrxer-news/",
83
+ "pub_date": "2007-11-17 21:30:51 +0000",
84
+ "creator": "wrxernews",
85
+ "content": "Welcome to <strong>Wrxer News</strong> - The most up-to-date and reliable source for Wrxer news.",
86
+ "excerpt": "Excerpt Text",
87
+ "id": 3,
88
+ "published_at": "2007-11-17 21:30:51 -0800",
89
+ "comment_status": "open",
90
+ "ping_status": "open",
91
+ "name": "welcome-to-wrxer-news",
92
+ "status": "publish",
93
+ "parent": 0,
94
+ "menu_order": 0,
95
+ "type": "post",
96
+ "is_sticky": 0,
97
+ "category": {
98
+ "domain": "category",
99
+ "nicename": "wrxer-news",
100
+ "body": "Wrxer News"
101
+ },
102
+ "postmetas": {
103
+ "data": "#<Enumerator::Lazy:0x007fa8f315a970>"
104
+ },
105
+ "comments": {
106
+ "data": "#<Enumerator::Lazy:0x007fa8f315a3f8>"
107
+ }
108
+ }
109
+ post.title
110
+ => "Welcome To Wrxer News."
111
+
112
+ ```
113
+
114
+ ## Contributing
115
+
116
+ 1. Fork it ( https://github.com/[my-github-username]/wrxer/fork )
117
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
118
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
119
+ 4. Push to the branch (`git push origin my-new-feature`)
120
+ 5. Create a new Pull Request
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+
@@ -0,0 +1,36 @@
1
+ require "wrxer/version"
2
+ require 'nokogiri'
3
+ require 'time'
4
+ require 'json'
5
+ require 'open-uri'
6
+
7
+
8
+ require 'wrxer/coercion'
9
+ require 'wrxer/attribute'
10
+ require 'wrxer/wrxer_object'
11
+ require 'wrxer/wrxer_collection'
12
+
13
+ require 'wrxer/category'
14
+ require 'wrxer/postmeta'
15
+ require 'wrxer/postmeta_collection'
16
+ require 'wrxer/comment'
17
+ require 'wrxer/comment_collection'
18
+
19
+ require 'wrxer/post'
20
+ require 'wrxer/post_collection'
21
+ require 'wrxer/author'
22
+ require 'wrxer/image'
23
+ require 'wrxer/document'
24
+
25
+ require 'wrxer/parser'
26
+ require 'wrxer/uri_parser'
27
+
28
+ module Wrxer
29
+ def self.parse(file)
30
+ Parser.new(file).call
31
+ end
32
+
33
+ def self.parse_uri(uri)
34
+ URIParser.new(uri).call
35
+ end
36
+ end
@@ -0,0 +1,7 @@
1
+ module Wrxer
2
+ class Attribute < Struct.new(:name, :xpath, :coercion)
3
+ def call(document, params = {})
4
+ coercion.call(document, { xpath: xpath })
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ module Wrxer
2
+ class Author < WrxerObject
3
+ xpath 'wp:author'
4
+ attribute :login, "wp:author_login"
5
+ attribute :email, "wp:autor_email"
6
+ attribute :display_name, "wp:author_display_name"
7
+ attribute :first_name, "wp:author_first_name"
8
+ attribute :last_name, "wp:author_last_name"
9
+ end
10
+ end
@@ -0,0 +1,8 @@
1
+ module Wrxer
2
+ class Category < WrxerObject
3
+ xpath 'category'
4
+ attribute :domain, 'domain', ElementAttribute
5
+ attribute :nicename, 'nicename', ElementAttribute
6
+ attribute :body, nil, ChildAttribute
7
+ end
8
+ end
@@ -0,0 +1,60 @@
1
+ module Wrxer
2
+ class Coercion
3
+ def self.xpath(value)
4
+ @xpath = value
5
+ end
6
+
7
+ def self.call(document, params = {})
8
+ root = @xpath || params[:xpath]
9
+ unless document.name == root
10
+ document = document.at_xpath(root)
11
+ end
12
+
13
+ document.nil? ? nil : self.coerce(document)
14
+ end
15
+
16
+ def self.coerce(document)
17
+ self.new(document)
18
+ end
19
+ end
20
+
21
+ class TextAttribute < Coercion
22
+ def self.coerce(document)
23
+ document.text
24
+ end
25
+ end
26
+
27
+ class IntegerAttribute < Coercion
28
+ def self.coerce(document)
29
+ Integer(document.text)
30
+ end
31
+ end
32
+
33
+ class TimeAttribute < Coercion
34
+ def self.coerce(document)
35
+ Time.parse(document.text)
36
+ end
37
+ end
38
+
39
+ class ElementAttribute < Coercion
40
+ def self.call(document, params = {})
41
+ element = document.attributes[params[:xpath].to_s]
42
+ element.nil? ? nil : self.coerce(element)
43
+ end
44
+
45
+ def self.coerce(document)
46
+ document.value
47
+ end
48
+ end
49
+
50
+ class ChildAttribute < Coercion
51
+ def self.call(document, params = {})
52
+ child = document.children
53
+ child.empty? ? nil : self.coerce(child)
54
+ end
55
+
56
+ def self.coerce(document)
57
+ document.text
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,16 @@
1
+ module Wrxer
2
+ class Comment < WrxerObject
3
+ xpath 'comment'
4
+ attribute :id, IntegerAttribute
5
+ attribute :author, "wp:comment_author"
6
+ attribute :email, "wp:author_email"
7
+ attribute :url, "wp:author_url"
8
+ attribute :ip, "wp:author_ip"
9
+ attribute :published_at, "wp:comment_date_gmt", TimeAttribute
10
+ attribute :content, "wp:comment_content"
11
+ attribute :approved, "wp:comment_approved", IntegerAttribute
12
+ attribute :type, "wp:comment_type"
13
+ attribute :parent, "wp:comment_parent", IntegerAttribute
14
+ attribute :user_id, "wp:comment_user_id", IntegerAttribute
15
+ end
16
+ end
@@ -0,0 +1,7 @@
1
+ module Wrxer
2
+ class CommentCollection < WrxerCollection
3
+ xpath "item"
4
+ collection :comments, "//wp:comment", Comment
5
+ end
6
+ end
7
+
@@ -0,0 +1,17 @@
1
+ module Wrxer
2
+ class Document < WrxerObject
3
+ xpath '//channel'
4
+ attribute :title
5
+ attribute :link
6
+ attribute :description
7
+ attribute :pub_date, "pubDate", TimeAttribute
8
+ attribute :language
9
+ attribute :wrx_version, "wp:wrx_version"
10
+ attribute :base_site_url, "wp:base_site_url"
11
+ attribute :base_blog_url, "wp:base_blog_url"
12
+ attribute :author, "wp:author", Author
13
+ attribute :generator
14
+ attribute :image, "image", Image
15
+ attribute :posts, "item", PostCollection
16
+ end
17
+ end
@@ -0,0 +1,8 @@
1
+ module Wrxer
2
+ class Image < WrxerObject
3
+ xpath 'image'
4
+ attribute :url
5
+ attribute :title
6
+ attribute :link
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ module Wrxer
2
+ class Parser
3
+ attr_reader :filename, :xml_document, :document
4
+ def initialize(filename)
5
+ @filename = filename
6
+
7
+ File.open(filename) do |file|
8
+ @xml_document = Nokogiri::XML(file)
9
+ end
10
+ end
11
+
12
+ def call
13
+ @document ||= Document.call(@xml_document)
14
+ end
15
+
16
+ def inspect
17
+ "#<#{self.class}:0x#{self.object_id.to_s(16)}> { filename: #{@filename.to_path}, xml_document: #{@xml_document.class} }"
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,24 @@
1
+ module Wrxer
2
+ class Post < WrxerObject
3
+ xpath "item"
4
+ attribute :title
5
+ attribute :link
6
+ attribute :pub_date, "pubDate", TimeAttribute
7
+ attribute :creator, "dc:creator"
8
+ attribute :content, "content:encoded"
9
+ attribute :excerpt, "excerpt:encoded"
10
+ attribute :id, "wp:post_id", IntegerAttribute
11
+ attribute :published_at, 'wp:post_date_gmt', TimeAttribute
12
+ attribute :comment_status, "wp:comment_status"
13
+ attribute :ping_status, "wp:ping_status"
14
+ attribute :name, 'wp:post_name'
15
+ attribute :status, "wp:status"
16
+ attribute :parent, "wp:post_parent", IntegerAttribute
17
+ attribute :menu_order, "wp:menu_order", IntegerAttribute
18
+ attribute :type, "wp:post_type"
19
+ attribute :is_sticky, "wp:is_sticky", IntegerAttribute
20
+ attribute :category, "category", Category
21
+ attribute :postmetas, 'item', PostmetaCollection
22
+ attribute :comments, 'item', CommentCollection
23
+ end
24
+ end
@@ -0,0 +1,6 @@
1
+ module Wrxer
2
+ class PostCollection < WrxerCollection
3
+ xpath "//channel"
4
+ collection :posts, "//item", Post
5
+ end
6
+ end
@@ -0,0 +1,7 @@
1
+ module Wrxer
2
+ class Postmeta < WrxerObject
3
+ xpath 'postmeta'
4
+ attribute :key, "wp:meta_key"
5
+ attribute :value, "wp:meta_value", IntegerAttribute
6
+ end
7
+ end