mercury_web_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9e898796910ce856c75c95b28281b76d7abc9f5b
4
+ data.tar.gz: 3f374381395d596cea22a88fcce6253cc57433f8
5
+ SHA512:
6
+ metadata.gz: e3fedbc7241d6302552b95aaf477d9de803f042f5e8ab8f041a1d8fd2585908f306f0eba156e167bab219ce0ee3ef06174ea6f79a648533ec86fa4ac5ce7eeea
7
+ data.tar.gz: f966a98563f9f18ac204ebb669e4f515c77ac4c776adf81e6c9af94b24a92881f0fb2aaedb1a6a026b2fb6ab9faf716e1e3d2e58fdb88c73da152eb656d07c19
@@ -0,0 +1,8 @@
1
+ .bundle
2
+ .projections.json
3
+ .ruby-gemset
4
+ .ruby-version
5
+ Gemfile.lock
6
+ doc/
7
+ pkg/
8
+ rdoc/
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,8 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.1
3
+
4
+ Documentation:
5
+ Enabled: false
6
+
7
+ Style/ClassAndModuleChildren:
8
+ Enabled: false
@@ -0,0 +1,17 @@
1
+ language: ruby
2
+
3
+ cache: bundler
4
+
5
+ rvm:
6
+ - 2.1
7
+ - 2.2
8
+
9
+ matrix:
10
+ fast_finish: true
11
+
12
+ before_install:
13
+ - gem update bundler
14
+
15
+ script:
16
+ - bundle exec rake
17
+ - bundle exec danger
@@ -0,0 +1,5 @@
1
+ # Mercury Web Parser Changelog
2
+
3
+ ## 0.1.0
4
+
5
+ * Initial release
@@ -0,0 +1 @@
1
+ commit_lint.check
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org/'
2
+
3
+ gemspec
4
+
5
+ gem 'pry'
6
+
7
+ group :test do
8
+ gem 'rake'
9
+ end
data/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016
4
+
5
+ - Jim Fiorato
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all
15
+ copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ 0
@@ -0,0 +1,76 @@
1
+ # Mercury Web Parser
2
+
3
+ A simple Ruby wrapper for the [Mercury Web Parser API][mercury-url]
4
+
5
+ [![Build Status][travis-badge]][travis] [![Code Climate][code-climate-badge]][code-climate]
6
+
7
+ [travis-badge]: https://travis-ci.org/theoldreader/mercury_web_parser.png
8
+ [travis]: http://travis-ci.org/theoldreader/mercury_web_parser
9
+ [code-climate-badge]: https://codeclimate.com/github/theoldreader/mercury_web_parser.png
10
+ [code-climate]: https://codeclimate.com/github/theoldreader/mercury_web_parser
11
+
12
+ ## Installation
13
+ Add this line to your application's Gemfile:
14
+
15
+ gem 'mercury_web_parser'
16
+
17
+ And then execute:
18
+
19
+ $ bundle
20
+
21
+ Or install it yourself as:
22
+
23
+ $ gem install mercury_web_parser
24
+
25
+ ## Configuration
26
+ You must first obtain an API token from the fine folks at [Mercury][mercury-url]
27
+ in order to make requests to their Web Parser API.
28
+
29
+ Single token usage
30
+
31
+ MercuryWebParser.api_token = API_TOKEN
32
+
33
+ or set multiple options with a block:
34
+ ```ruby
35
+ MercuryWebParser.configure do |parser|
36
+ parser.api_token = API_TOKEN
37
+ end
38
+ ```
39
+
40
+
41
+ Multiple tokens or multithreaded usage:
42
+
43
+ ```ruby
44
+ client = MercuryWebParser::Client.new(api_token: API_TOKEN)
45
+ ```
46
+
47
+ ## Usage
48
+
49
+ ### Parse
50
+
51
+ Parse a webpage and return its main content:
52
+
53
+ ```ruby
54
+ article = MercuryWebParser.parse("http://sethgodin.typepad.com/seths_blog/2016/11/all-we-have-is-each-other.html")
55
+ => #<MercuryWebParser::Article title="Seth's Blog", author=nil, date_published=nil, dek=nil, lead_image_url="http://www.sethgodin.com/sg/images/og.jpg", content="<div id=\"alpha-inner\" class=\"pkg\"> <div class=\"module-typelist module\">...", next_page_url="http://sethgodin.typepad.com/seths_blog/2016/11/choose-better.html", url="http://sethgodin.typepad.com/seths_blog/2016/11/all-we-have-is-each-other.html", domain="sethgodin.typepad.com", excerpt="", word_count=462, direction="ltr", total_pages=4, pages_rendered=4>
56
+
57
+ article.title
58
+ article.content
59
+ article.author
60
+ article.date_published
61
+ article.lead_image_url
62
+ article.dek
63
+ article.next_page_url
64
+ article.url
65
+ article.domain
66
+ article.excerpt
67
+ article.word_count
68
+ article.direction
69
+ article.total_pages
70
+ article.rendered_pages
71
+ ```
72
+
73
+ ## Inspiration
74
+ Clone of [readability_parser](https://github.com/phildionne/readability_parser) gem
75
+
76
+ [mercury-url]: https://mercury.postlight.com/web-parser/
@@ -0,0 +1,12 @@
1
+ require 'rspec/core/rake_task'
2
+ require 'rubocop/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec) do |t|
5
+ t.verbose = false
6
+ end
7
+
8
+ RuboCop::RakeTask.new(:rubocop) do |t|
9
+ t.options = ['--display-cop-names']
10
+ end
11
+
12
+ task default: [:spec, :rubocop]
@@ -0,0 +1,46 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://mercury.postlight.com/parser?url=http%3A%2F%2Fwww.google.com
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - MercuryWebParser Ruby Gem 0.1.0
12
+ X-api-key:
13
+ - '1234'
14
+ response:
15
+ status:
16
+ code: 401
17
+ message:
18
+ headers:
19
+ content-length:
20
+ - '26'
21
+ content-type:
22
+ - application/json
23
+ date:
24
+ - Wed, 30 Nov 2016 03:43:36 GMT
25
+ via:
26
+ - 1.1 051be6c213d69e313cabc1e7eee2a118.cloudfront.net (CloudFront)
27
+ x-amz-cf-id:
28
+ - isOCPBGabdbNiXorBpblYgzAa4_BAsbjxzSKCt5SUpdajbSOeCP7rA==
29
+ x-amzn-errortype:
30
+ - UnauthorizedException
31
+ x-amzn-requestid:
32
+ - 2c7f96be-b6af-11e6-a859-2d8d278efab2
33
+ x-cache:
34
+ - Error from cloudfront
35
+ age:
36
+ - '0'
37
+ connection:
38
+ - close
39
+ server:
40
+ - BitBalloon
41
+ body:
42
+ encoding: UTF-8
43
+ string: '{"message":"Unauthorized"}'
44
+ http_version:
45
+ recorded_at: Wed, 30 Nov 2016 03:43:36 GMT
46
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,48 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://mercury.postlight.com/parser?url=http%3A%2F%2Fabc.go.com%2Frobots.txt
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - MercuryWebParser Ruby Gem 0.1.0
12
+ X-api-key:
13
+ - PvKK11D5fKtwj06eD0AevxAdUAWB1Ra3jTaeIgXE
14
+ response:
15
+ status:
16
+ code: 200
17
+ message:
18
+ headers:
19
+ access-control-allow-origin:
20
+ - "*"
21
+ content-length:
22
+ - '4'
23
+ content-type:
24
+ - application/json
25
+ date:
26
+ - Wed, 30 Nov 2016 03:47:28 GMT
27
+ via:
28
+ - 1.1 ea768b6198c0f73b6fb05e3625d27a5d.cloudfront.net (CloudFront)
29
+ x-amz-cf-id:
30
+ - 78gk1qd8sMcETNy4f0guFwWjvT4RZmMn0LqJPCgY3T7ED-jT1V5LCA==
31
+ x-amzn-requestid:
32
+ - b6e3e74a-b6af-11e6-8ccf-b3e2989761ed
33
+ x-amzn-trace-id:
34
+ - Root=1-583e4bd0-8c4ee867e61287c843a16623
35
+ x-cache:
36
+ - Miss from cloudfront
37
+ age:
38
+ - '0'
39
+ connection:
40
+ - close
41
+ server:
42
+ - BitBalloon
43
+ body:
44
+ encoding: UTF-8
45
+ string: 'null'
46
+ http_version:
47
+ recorded_at: Wed, 30 Nov 2016 03:47:28 GMT
48
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,109 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://mercury.postlight.com/parser?url=https%3A%2F%2Fmedium.com%2F%40readability%2Fthe-readability-bookmarking-service-will-shut-down-on-september-30-2016-1641cc18e02b
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - MercuryWebParser Ruby Gem 0.1.0
12
+ X-api-key:
13
+ - PvKK11D5fKtwj06eD0AevxAdUAWB1Ra3jTaeIgXE
14
+ response:
15
+ status:
16
+ code: 200
17
+ message:
18
+ headers:
19
+ access-control-allow-origin:
20
+ - "*"
21
+ content-length:
22
+ - '2706'
23
+ content-type:
24
+ - application/json
25
+ date:
26
+ - Wed, 30 Nov 2016 02:59:26 GMT
27
+ via:
28
+ - 1.1 631585eaa766d0d1b2a196fb1d81ea69.cloudfront.net (CloudFront)
29
+ x-amz-cf-id:
30
+ - yYv44_ZYseF8o8aqui50IgjeckqlcMizRdVqMZXCdX1vnrGH_1Q9iw==
31
+ x-amzn-requestid:
32
+ - 008b8373-b6a9-11e6-aff9-a322a87b8369
33
+ x-amzn-trace-id:
34
+ - Root=1-583e408d-aec2ea51a4d78b1f7da07c83
35
+ x-cache:
36
+ - Miss from cloudfront
37
+ age:
38
+ - '1'
39
+ connection:
40
+ - close
41
+ server:
42
+ - BitBalloon
43
+ body:
44
+ encoding: ASCII-8BIT
45
+ string: !binary |-
46
+ eyJ0aXRsZSI6IlRoZSBSZWFkYWJpbGl0eSBib29rbWFya2luZyBzZXJ2aWNl
47
+ IHdpbGwgc2h1dCBkb3duIG9uIFNlcHRlbWJlciAzMCwgMjAxNi4iLCJjb250
48
+ ZW50IjoiPGRpdj48ZGl2IGNsYXNzPVwic2VjdGlvbi1jb250ZW50XCI+PGRp
49
+ diBjbGFzcz1cInNlY3Rpb24taW5uZXIgc2VjdGlvbkxheW91dC0taW5zZXRD
50
+ b2x1bW5cIj48cCBpZD1cIjI5YmFcIiBjbGFzcz1cImdyYWYgZ3JhZi0tcCBn
51
+ cmFmLWFmdGVyLS1oM1wiPkFmdGVyIG1vcmUgdGhhbiBmaXZlIHllYXJzIG9m
52
+ IG9wZXJhdGlvbiwgdGhlIFJlYWRhYmlsaXR5IGFydGljbGUgYm9va21hcmtp
53
+ bmcvcmVhZC1pdC1sYXRlciBzZXJ2aWNlIHdpbGwgYmUgc2h1dHRpbmcgZG93
54
+ biBhZnRlciA8c3Ryb25nIGNsYXNzPVwibWFya3VwLS1zdHJvbmcgbWFya3Vw
55
+ LS1wLXN0cm9uZ1wiPlNlcHRlbWJlciAzMCwgMjAxNjwvc3Ryb25nPi48L3A+
56
+ PHAgaWQ9XCJiMWQ1XCIgY2xhc3M9XCJncmFmIGdyYWYtLXAgZ3JhZi1hZnRl
57
+ ci0tcFwiPklmIHlvdSYjeDIwMTk7ZCBsaWtlIHRvIHNhdmUgeW91ciBib29r
58
+ bWFya3MsIHBsZWFzZSBmb2xsb3cgdGhlc2UgZGlyZWN0aW9ucyBiZWZvcmUg
59
+ PHN0cm9uZyBjbGFzcz1cIm1hcmt1cC0tc3Ryb25nIG1hcmt1cC0tcC1zdHJv
60
+ bmdcIj5TZXB0ZW1iZXIgMzAsIDIwMTY8L3N0cm9uZz4uIFlvdSBjYW4gZXhw
61
+ b3J0IHlvdXIgYm9va21hcmtzIGJ5IHZpc2l0aW5nIHlvdXIgVG9vbHMgcGFn
62
+ ZSwgc2Nyb2xsaW5nIGRvd24gdG8gdGhlIERhdGEgRXhwb3J0IHNlY3Rpb24s
63
+ IGFuZCBjbGlja2luZyB0aGUgRXhwb3J0IFlvdXIgRGF0YSBidXR0b24uIFlv
64
+ dSYjeDIwMTk7bGwgcmVjZWl2ZSBhbiBlbWFpbCBzb29uIGFmdGVyIHRoYXQg
65
+ Y29udGFpbnMgeW91ciBib29rbWFya3MuIFNpbWlsYXIgc2VydmljZXMgbGlr
66
+ ZSBJbnN0YXBhcGVyIHdpbGwgYWxsb3cgeW91IHRvIGltcG9ydCB5b3VyIGJv
67
+ b2ttYXJrcyBpbnRvIHRoZWlyIHNlcnZpY2UuPC9wPjxwIGlkPVwiNGMxY1wi
68
+ IGNsYXNzPVwiZ3JhZiBncmFmLS1wIGdyYWYtYWZ0ZXItLXBcIj48c3Ryb25n
69
+ IGNsYXNzPVwibWFya3VwLS1zdHJvbmcgbWFya3VwLS1wLXN0cm9uZ1wiPjxl
70
+ bSBjbGFzcz1cIm1hcmt1cC0tZW0gbWFya3VwLS1wLWVtXCI+V2hhdCBpZiBJ
71
+ IHVzZSB0aGUgUmVhZGFiaWxpdHkgUGFyc2VyIEFQST88L2VtPjwvc3Ryb25n
72
+ PjwvcD48cCBpZD1cIjYwOGVcIiBjbGFzcz1cImdyYWYgZ3JhZi0tcCBncmFm
73
+ LWFmdGVyLS1wXCI+VGhlIFJlYWRhYmlsaXR5IFBhcnNlciBBUEkgZm9yIGRl
74
+ dmVsb3BlcnMgd2lsbCBzaHV0IGRvd24gPHN0cm9uZyBjbGFzcz1cIm1hcmt1
75
+ cC0tc3Ryb25nIG1hcmt1cC0tcC1zdHJvbmdcIj5EZWNlbWJlciAxMCwgMjAx
76
+ Njwvc3Ryb25nPi4gSG93ZXZlciwgd2UmI3gyMDE5O3ZlIGdvdCBhIGdyZWF0
77
+ IGFsdGVybmF0aXZlIGZvciB5b3U6IHRoZSA8YSBocmVmPVwiaHR0cHM6Ly9t
78
+ ZXJjdXJ5LnBvc3RsaWdodC5jb20vXCIgY2xhc3M9XCJtYXJrdXAtLWFuY2hv
79
+ ciBtYXJrdXAtLXAtYW5jaG9yXCI+PHN0cm9uZyBjbGFzcz1cIm1hcmt1cC0t
80
+ c3Ryb25nIG1hcmt1cC0tcC1zdHJvbmdcIj5NZXJjdXJ5IFRvb2xraXQ8L3N0
81
+ cm9uZz48L2E+LjwvcD48cCBpZD1cIjFkZWJcIiBjbGFzcz1cImdyYWYgZ3Jh
82
+ Zi0tcCBncmFmLWFmdGVyLS1wXCI+U2luY2UgaXQgbGF1bmNoZWQgYXMgYSBz
83
+ aW1wbGUgYm9va21hcmtsZXQgaW4gMjAwOSwgdGhlIFJlYWRhYmlsaXR5IHBy
84
+ b2plY3QmI3gyMDE5O3MgaW1wYWN0IG9uIHJlYWRpbmcgb24gdGhlIHdlYiBh
85
+ bmQgYmV5b25kIGlzIHVuZGVuaWFibGUuIFdlIGFwcHJlY2lhdGUgeW91ciBs
86
+ b3lhbHR5IGFuZCBzdXBwb3J0IGZvciB0aGUgcGxhdGZvcm0gb3ZlciB0aGUg
87
+ eWVhcnMuPC9wPjxwIGlkPVwiZDBiNlwiIGNsYXNzPVwiZ3JhZiBncmFmLS1w
88
+ IGdyYWYtYWZ0ZXItLXBcIj5XZSB3ZWxjb21lIHlvdXIgcXVlc3Rpb25zIGF0
89
+ IDxhIGhyZWY9XCJtYWlsdG86Y29udGFjdCUyQnJpbEByZWFkYWJpbGl0eS5j
90
+ b21cIiBjbGFzcz1cIm1hcmt1cC0tYW5jaG9yIG1hcmt1cC0tcC1hbmNob3Jc
91
+ Ij5jb250YWN0K3JpbEByZWFkYWJpbGl0eS5jb208L2E+LjwvcD48cCBpZD1c
92
+ ImMwMjRcIiBjbGFzcz1cImdyYWYgZ3JhZi0tcCBncmFmLWFmdGVyLS1wIGdy
93
+ YWYtLWxhc3RcIj48ZW0gY2xhc3M9XCJtYXJrdXAtLWVtIG1hcmt1cC0tcC1l
94
+ bVwiPlRoZSBSZWFkYWJpbGl0eSBUZWFtPC9lbT48L3A+PC9kaXY+PC9kaXY+
95
+ PC9kaXY+IiwiYXV0aG9yIjoiUmVhZGFiaWxpdHkiLCJkYXRlX3B1Ymxpc2hl
96
+ ZCI6IjIwMTYtMDktMDJUMTg6NDM6NTkuODY0WiIsImxlYWRfaW1hZ2VfdXJs
97
+ IjpudWxsLCJkZWsiOm51bGwsIm5leHRfcGFnZV91cmwiOm51bGwsInVybCI6
98
+ Imh0dHBzOi8vbWVkaXVtLmNvbS9AcmVhZGFiaWxpdHkvdGhlLXJlYWRhYmls
99
+ aXR5LWJvb2ttYXJraW5nLXNlcnZpY2Utd2lsbC1zaHV0LWRvd24tb24tc2Vw
100
+ dGVtYmVyLTMwLTIwMTYtMTY0MWNjMThlMDJiIiwiZG9tYWluIjoibWVkaXVt
101
+ LmNvbSIsImV4Y2VycHQiOiJBZnRlciBtb3JlIHRoYW4gZml2ZSB5ZWFycyBv
102
+ ZiBvcGVyYXRpb24sIHRoZSBSZWFkYWJpbGl0eSBhcnRpY2xlIGJvb2ttYXJr
103
+ aW5nL3JlYWQtaXQtbGF0ZXIgc2VydmljZSB3aWxsIGJlIHNodXR0aW5nIGRv
104
+ d24gYWZ0ZXIgU2VwdGVtYmVyIDMw4oCmIiwid29yZF9jb3VudCI6MTUxLCJk
105
+ aXJlY3Rpb24iOiJsdHIiLCJ0b3RhbF9wYWdlcyI6MSwicmVuZGVyZWRfcGFn
106
+ ZXMiOjF9
107
+ http_version:
108
+ recorded_at: Wed, 30 Nov 2016 02:59:26 GMT
109
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,29 @@
1
+ require 'mercury_web_parser/configuration'
2
+ require 'mercury_web_parser/client'
3
+
4
+ module MercuryWebParser
5
+ extend Configuration
6
+
7
+ class << self
8
+ # Alias for MercuryWebParser::Client.new
9
+ #
10
+ # @return [MercuryWebParser::Client]
11
+ def new(options = {})
12
+ MercuryWebParser::Client.new(options)
13
+ end
14
+
15
+ # Delegate to MercuryWebParser::Client
16
+ def method_missing(method, *args, &block)
17
+ return super unless new.respond_to?(method)
18
+ new.send(method, *args, &block)
19
+ end
20
+
21
+ def respond_to?(method, include_private = false)
22
+ new.respond_to?(method, include_private) || super(method, include_private)
23
+ end
24
+
25
+ def respond_to_missing?(method_name, include_private = false)
26
+ Configuration::VALID_CONFIG_KEYS.include?(method_name) || super
27
+ end
28
+ end
29
+ end # MercuryWebParser
@@ -0,0 +1,16 @@
1
+ module MercuryWebParser
2
+ module API
3
+ module Content
4
+ # Parse a webpage and return its main content
5
+ # Returns a MercuryWebParser::Article object
6
+ #
7
+ # @param url [String] The URL of an article to return the content for
8
+ # @return [MercuryWebParser::Article]
9
+ def parse(url)
10
+ response = get('', url: url)
11
+
12
+ MercuryWebParser::Article.new(response)
13
+ end
14
+ end # Content
15
+ end # API
16
+ end
@@ -0,0 +1,12 @@
1
+ require 'ostruct'
2
+
3
+ module MercuryWebParser
4
+ class Article < OpenStruct
5
+ # Returns a MercuryWebParser::Article object
6
+ #
7
+ # @return [MercuryWebParser::Article]
8
+ def initialize(article)
9
+ super
10
+ end
11
+ end # Article
12
+ end
@@ -0,0 +1,21 @@
1
+ require 'mercury_web_parser/connection'
2
+ require 'mercury_web_parser/request'
3
+ require 'mercury_web_parser/api/content'
4
+ require 'mercury_web_parser/article'
5
+
6
+ module MercuryWebParser
7
+ class Client
8
+ attr_accessor(*Configuration::VALID_CONFIG_KEYS)
9
+
10
+ def initialize(options = {})
11
+ options = MercuryWebParser.options.merge(options)
12
+ Configuration::VALID_OPTIONS_KEYS.each do |key|
13
+ send("#{key}=", options[key])
14
+ end
15
+ end
16
+
17
+ include MercuryWebParser::Connection
18
+ include MercuryWebParser::Request
19
+ include MercuryWebParser::API::Content
20
+ end # Client
21
+ end
@@ -0,0 +1,37 @@
1
+ require 'mercury_web_parser/version'
2
+
3
+ module MercuryWebParser
4
+ module Configuration
5
+ VALID_CONNECTION_KEYS = [:api_endpoint, :user_agent].freeze
6
+ VALID_OPTIONS_KEYS = [:api_token].freeze
7
+ VALID_CONFIG_KEYS = VALID_CONNECTION_KEYS + VALID_OPTIONS_KEYS
8
+
9
+ DEFAULT_API_ENDPOINT = 'https://mercury.postlight.com/parser'.freeze
10
+ DEFAULT_USER_AGENT = "MercuryWebParser Ruby Gem #{MercuryWebParser::VERSION}".freeze # rubocop:disable Metrics/LineLength
11
+ DEFAULT_API_TOKEN = nil
12
+
13
+ attr_accessor(*VALID_CONFIG_KEYS)
14
+
15
+ def self.extended(base)
16
+ base.reset!
17
+ end
18
+
19
+ # Convenience method to allow configuration options to be set in a block
20
+ def configure
21
+ yield self
22
+ end
23
+
24
+ def options
25
+ Hash[* VALID_CONFIG_KEYS.map { |key| [key, send(key)] }.flatten]
26
+ end
27
+
28
+ def reset!
29
+ self.api_endpoint = DEFAULT_API_ENDPOINT
30
+ self.user_agent = DEFAULT_USER_AGENT
31
+
32
+ self.api_token = DEFAULT_API_TOKEN
33
+
34
+ true
35
+ end
36
+ end # Configuration
37
+ end
@@ -0,0 +1,42 @@
1
+ require 'faraday'
2
+ require 'faraday_middleware'
3
+
4
+ module MercuryWebParser
5
+ module Connection
6
+ # Instantiate a Faraday::Connection
7
+ # @private
8
+ private
9
+
10
+ # Returns a Faraday::Connection object
11
+ #
12
+ # @return [Faraday::Connection]
13
+ def connection(options = {})
14
+ options = {
15
+ url: MercuryWebParser.api_endpoint
16
+ }.merge(options)
17
+
18
+ get_connection(options)
19
+ end
20
+
21
+ def get_connection(options)
22
+ conn = Faraday.new(options) do |c|
23
+ # encode request params as "www-form-urlencoded"
24
+ c.use Faraday::Request::UrlEncoded
25
+
26
+ c.use FaradayMiddleware::FollowRedirects, limit: 3
27
+
28
+ # raise exceptions on 40x, 50x responses
29
+ c.use Faraday::Response::RaiseError
30
+
31
+ c.response :json, content_type: /\bjson$/
32
+
33
+ c.adapter Faraday.default_adapter
34
+ end
35
+
36
+ conn.headers[:user_agent] = MercuryWebParser.user_agent
37
+ conn.headers[:"x-api-key"] = MercuryWebParser.api_token
38
+
39
+ conn
40
+ end
41
+ end # Connection
42
+ end
@@ -0,0 +1,51 @@
1
+ module MercuryWebParser
2
+ class Error < StandardError
3
+ # Raised when Mercury returns a 4xx or 500 HTTP status code
4
+ class ClientError < Error
5
+ # Creates a new error from an HTTP environement
6
+ #
7
+ # @param response [Hash]
8
+ # @return [MercuryWebParser::Error::ClientError]
9
+ def initialize(error = nil)
10
+ http_error = error.response[:status].to_i
11
+
12
+ if ERROR_MAP.key?(http_error)
13
+ raise ERROR_MAP[http_error], error.response[:body]['message']
14
+ end
15
+
16
+ super
17
+ end
18
+ end # ClientError
19
+
20
+ class ConfigurationError < MercuryWebParser::Error; end
21
+
22
+ # Raised when there's an error in Faraday
23
+ class RequestError < MercuryWebParser::Error; end
24
+
25
+ # Raised when MercuryWebParser returns a 400 HTTP status code
26
+ class BadRequest < MercuryWebParser::Error; end
27
+
28
+ # Raised when the response from the parser is null
29
+ class NotParseable < MercuryWebParser::Error; end
30
+
31
+ # Raised when MercuryWebParser returns a 401 HTTP status code
32
+ class UnauthorizedRequest < MercuryWebParser::Error; end
33
+
34
+ # Raised when MercuryWebParser returns a 403 HTTP status code
35
+ class Forbidden < MercuryWebParser::Error; end
36
+
37
+ # Raised when MercuryWebParser returns a 404 HTTP status code
38
+ class NotFound < MercuryWebParser::Error; end
39
+
40
+ # Raised when MercuryWebParser returns a 500 HTTP status code
41
+ class InternalServerError < MercuryWebParser::Error; end
42
+
43
+ ERROR_MAP = {
44
+ 400 => MercuryWebParser::Error::BadRequest,
45
+ 401 => MercuryWebParser::Error::UnauthorizedRequest,
46
+ 403 => MercuryWebParser::Error::Forbidden,
47
+ 404 => MercuryWebParser::Error::NotFound,
48
+ 500 => MercuryWebParser::Error::InternalServerError
49
+ }.freeze
50
+ end # Error
51
+ end
@@ -0,0 +1,37 @@
1
+ require 'mercury_web_parser/error'
2
+
3
+ module MercuryWebParser
4
+ module Request
5
+ # Performs a HTTP Get request
6
+ def get(path, params = {})
7
+ request(:get, path, params)
8
+ end
9
+
10
+ private
11
+
12
+ # Returns a Faraday::Response object
13
+ #
14
+ # @return [Faraday::Response]
15
+ def request(method, path, params = {})
16
+ if api_token.nil?
17
+ raise MercuryWebParser::Error::ConfigurationError,
18
+ 'Please configure MercuryWebParser.api_token first'
19
+ end
20
+
21
+ make_request(method, path, params)
22
+ end
23
+
24
+ def make_request(method, path, params)
25
+ response = connection.send(method) do |request|
26
+ request.url(path, params)
27
+ end
28
+ response.body
29
+ rescue Faraday::ParsingError
30
+ raise MercuryWebParser::Error::NotParseable, 'Unparseable response'
31
+ rescue Faraday::Error::ClientError => error
32
+ raise MercuryWebParser::Error::ClientError, error
33
+ rescue => error
34
+ raise MercuryWebParser::Error::RequestError, error
35
+ end
36
+ end # Request
37
+ end
@@ -0,0 +1,3 @@
1
+ module MercuryWebParser
2
+ VERSION = '0.1.0'.freeze
3
+ end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/mercury_web_parser/version', __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'mercury_web_parser'
6
+ s.version = MercuryWebParser::VERSION
7
+ s.summary = 'A simple Ruby wrapper for the Mercury Web Parser API'
8
+ s.authors = ['Jim Fiorato']
9
+ s.email = 'mercury_web_parser@theoldreader.com'
10
+ s.homepage = 'http://github.com/theoldreader/mercury-web-parser'
11
+ s.license = 'MIT'
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.require_paths = ['lib']
15
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+
17
+ s.required_ruby_version = '>= 2.0'
18
+
19
+ s.add_dependency 'faraday', '>= 0.9'
20
+ s.add_dependency 'faraday_middleware', '>= 0.9'
21
+
22
+ s.add_development_dependency 'danger'
23
+ s.add_development_dependency 'danger-commit_lint'
24
+ s.add_development_dependency 'rspec'
25
+ s.add_development_dependency 'rubocop'
26
+ s.add_development_dependency 'vcr'
27
+ end
@@ -0,0 +1,93 @@
1
+ require 'spec_helper'
2
+
3
+ describe MercuryWebParser::Client do
4
+ after do
5
+ MercuryWebParser.reset!
6
+ end
7
+
8
+ context 'with module configuration' do
9
+ before do
10
+ MercuryWebParser.configure do |config|
11
+ MercuryWebParser::Configuration::VALID_CONFIG_KEYS.each do |key|
12
+ config.send("#{key}=", key)
13
+ end
14
+ end
15
+ end
16
+
17
+ it 'inherits the module configuration' do
18
+ MercuryWebParser::Configuration::VALID_CONFIG_KEYS.each do |key|
19
+ expect(MercuryWebParser.method(:"#{key}")).to be_a_kind_of(Method)
20
+ expect(MercuryWebParser.send(:"#{key}")).to eq(key)
21
+ end
22
+ end
23
+ end
24
+
25
+ context 'with class configuration' do
26
+ before do
27
+ @configuration = {
28
+ api_token: '1234'
29
+ }
30
+ end
31
+
32
+ it 'overrides the module configuration after initialization' do
33
+ MercuryWebParser.configure do |config|
34
+ @configuration.each do |key, value|
35
+ config.send("#{key}=", value)
36
+ end
37
+ end
38
+
39
+ MercuryWebParser::Configuration::VALID_OPTIONS_KEYS.each do |key|
40
+ expect(MercuryWebParser.send(:"#{key}")).to eq(@configuration[key])
41
+ end
42
+ end
43
+ end
44
+
45
+ describe '#connection' do
46
+ it 'looks like Faraday connection' do
47
+ expect(subject.send(:connection)).to respond_to(:run_request)
48
+ end
49
+ end
50
+
51
+ describe '#request' do
52
+ before do
53
+ MercuryWebParser.api_token = '1234'
54
+ end
55
+
56
+ it 'successfully parses a page' do
57
+ VCR.use_cassette('successful') do
58
+ url = 'https://medium.com/@readability/the-readability-bookmarking-service-will-shut-down-on-september-30-2016-1641cc18e02b'
59
+ article = MercuryWebParser.parse(url)
60
+ expect(article.title).to match(/The Readability bookmarking service/)
61
+ expect(article.content).to match(/receive an email/)
62
+ expect(article.author).to eq('Readability')
63
+ expect(article.date_published).to eq('2016-09-02T18:43:59.864Z')
64
+ expect(article.lead_image_url).to be_nil
65
+ expect(article.dek).to be_nil
66
+ expect(article.next_page_url).to be_nil
67
+ expect(article.url).to eq(url)
68
+ expect(article.domain).to eq('medium.com')
69
+ expect(article.excerpt).to match(/more than five years/)
70
+ expect(article.word_count).to eq(151)
71
+ expect(article.direction).to eq('ltr')
72
+ expect(article.total_pages).to eq(1)
73
+ expect(article.rendered_pages).to eq(1)
74
+ end
75
+ end
76
+
77
+ it 'catches Faraday connection errors' do
78
+ VCR.use_cassette('connection_errors') do
79
+ expect { MercuryWebParser.parse('http://www.google.com') }.to(
80
+ raise_error(MercuryWebParser::Error::UnauthorizedRequest)
81
+ )
82
+ end
83
+ end
84
+
85
+ it 'catches Mercury Web Parser API errors' do
86
+ VCR.use_cassette('parser_errors') do
87
+ expect { MercuryWebParser.parse('http://abc.go.com/robots.txt') }.to(
88
+ raise_error(MercuryWebParser::Error::NotParseable)
89
+ )
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+
3
+ describe MercuryWebParser do
4
+ after do
5
+ MercuryWebParser.reset!
6
+ end
7
+
8
+ describe '#new' do
9
+ it 'is a MercuryWebParser::Client' do
10
+ expect(MercuryWebParser.new).to be_a_kind_of(MercuryWebParser::Client)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../lib/mercury_web_parser')
2
+ require 'vcr'
3
+
4
+ VCR.configure do |config|
5
+ config.cassette_library_dir = 'fixtures/vcr_cassettes'
6
+ config.hook_into :faraday
7
+ end
metadata ADDED
@@ -0,0 +1,170 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mercury_web_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jim Fiorato
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-11-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0.9'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday_middleware
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0.9'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: danger
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: danger-commit_lint
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: vcr
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description:
112
+ email: mercury_web_parser@theoldreader.com
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files: []
116
+ files:
117
+ - ".gitignore"
118
+ - ".rspec"
119
+ - ".rubocop.yml"
120
+ - ".travis.yml"
121
+ - CHANGELOG.md
122
+ - Dangerfile
123
+ - Gemfile
124
+ - LICENSE
125
+ - README.md
126
+ - Rakefile
127
+ - fixtures/vcr_cassettes/connection_errors.yml
128
+ - fixtures/vcr_cassettes/parser_errors.yml
129
+ - fixtures/vcr_cassettes/successful.yml
130
+ - lib/mercury_web_parser.rb
131
+ - lib/mercury_web_parser/api/content.rb
132
+ - lib/mercury_web_parser/article.rb
133
+ - lib/mercury_web_parser/client.rb
134
+ - lib/mercury_web_parser/configuration.rb
135
+ - lib/mercury_web_parser/connection.rb
136
+ - lib/mercury_web_parser/error.rb
137
+ - lib/mercury_web_parser/request.rb
138
+ - lib/mercury_web_parser/version.rb
139
+ - mercury_web_parser.gemspec
140
+ - spec/mercury_web_parser/client_spec.rb
141
+ - spec/mercury_web_parser_spec.rb
142
+ - spec/spec_helper.rb
143
+ homepage: http://github.com/theoldreader/mercury-web-parser
144
+ licenses:
145
+ - MIT
146
+ metadata: {}
147
+ post_install_message:
148
+ rdoc_options: []
149
+ require_paths:
150
+ - lib
151
+ required_ruby_version: !ruby/object:Gem::Requirement
152
+ requirements:
153
+ - - ">="
154
+ - !ruby/object:Gem::Version
155
+ version: '2.0'
156
+ required_rubygems_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: '0'
161
+ requirements: []
162
+ rubyforge_project:
163
+ rubygems_version: 2.4.8
164
+ signing_key:
165
+ specification_version: 4
166
+ summary: A simple Ruby wrapper for the Mercury Web Parser API
167
+ test_files:
168
+ - spec/mercury_web_parser/client_spec.rb
169
+ - spec/mercury_web_parser_spec.rb
170
+ - spec/spec_helper.rb