mercury_web_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9e898796910ce856c75c95b28281b76d7abc9f5b
4
+ data.tar.gz: 3f374381395d596cea22a88fcce6253cc57433f8
5
+ SHA512:
6
+ metadata.gz: e3fedbc7241d6302552b95aaf477d9de803f042f5e8ab8f041a1d8fd2585908f306f0eba156e167bab219ce0ee3ef06174ea6f79a648533ec86fa4ac5ce7eeea
7
+ data.tar.gz: f966a98563f9f18ac204ebb669e4f515c77ac4c776adf81e6c9af94b24a92881f0fb2aaedb1a6a026b2fb6ab9faf716e1e3d2e58fdb88c73da152eb656d07c19
@@ -0,0 +1,8 @@
1
+ .bundle
2
+ .projections.json
3
+ .ruby-gemset
4
+ .ruby-version
5
+ Gemfile.lock
6
+ doc/
7
+ pkg/
8
+ rdoc/
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,8 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.1
3
+
4
+ Documentation:
5
+ Enabled: false
6
+
7
+ Style/ClassAndModuleChildren:
8
+ Enabled: false
@@ -0,0 +1,17 @@
1
+ language: ruby
2
+
3
+ cache: bundler
4
+
5
+ rvm:
6
+ - 2.1
7
+ - 2.2
8
+
9
+ matrix:
10
+ fast_finish: true
11
+
12
+ before_install:
13
+ - gem update bundler
14
+
15
+ script:
16
+ - bundle exec rake
17
+ - bundle exec danger
@@ -0,0 +1,5 @@
1
+ # Mercury Web Parser Changelog
2
+
3
+ ## 0.1.0
4
+
5
+ * Initial release
@@ -0,0 +1 @@
1
+ commit_lint.check
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org/'
2
+
3
+ gemspec
4
+
5
+ gem 'pry'
6
+
7
+ group :test do
8
+ gem 'rake'
9
+ end
data/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016
4
+
5
+ - Jim Fiorato
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all
15
+ copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ 0
@@ -0,0 +1,76 @@
1
+ # Mercury Web Parser
2
+
3
+ A simple Ruby wrapper for the [Mercury Web Parser API][mercury-url]
4
+
5
+ [![Build Status][travis-badge]][travis] [![Code Climate][code-climate-badge]][code-climate]
6
+
7
+ [travis-badge]: https://travis-ci.org/theoldreader/mercury_web_parser.png
8
+ [travis]: http://travis-ci.org/theoldreader/mercury_web_parser
9
+ [code-climate-badge]: https://codeclimate.com/github/theoldreader/mercury_web_parser.png
10
+ [code-climate]: https://codeclimate.com/github/theoldreader/mercury_web_parser
11
+
12
+ ## Installation
13
+ Add this line to your application's Gemfile:
14
+
15
+ gem 'mercury_web_parser'
16
+
17
+ And then execute:
18
+
19
+ $ bundle
20
+
21
+ Or install it yourself as:
22
+
23
+ $ gem install mercury_web_parser
24
+
25
+ ## Configuration
26
+ You must first obtain an API token from the fine folks at [Mercury][mercury-url]
27
+ in order to make requests to their Web Parser API.
28
+
29
+ Single token usage
30
+
31
+ MercuryWebParser.api_token = API_TOKEN
32
+
33
+ or set multiple options with a block:
34
+ ```ruby
35
+ MercuryWebParser.configure do |parser|
36
+ parser.api_token = API_TOKEN
37
+ end
38
+ ```
39
+
40
+
41
+ Multiple tokens or multithreaded usage:
42
+
43
+ ```ruby
44
+ client = MercuryWebParser::Client.new(api_token: API_TOKEN)
45
+ ```
46
+
47
+ ## Usage
48
+
49
+ ### Parse
50
+
51
+ Parse a webpage and return its main content:
52
+
53
+ ```ruby
54
+ article = MercuryWebParser.parse("http://sethgodin.typepad.com/seths_blog/2016/11/all-we-have-is-each-other.html")
55
+ => #<MercuryWebParser::Article title="Seth's Blog", author=nil, date_published=nil, dek=nil, lead_image_url="http://www.sethgodin.com/sg/images/og.jpg", content="<div id=\"alpha-inner\" class=\"pkg\"> <div class=\"module-typelist module\">...", next_page_url="http://sethgodin.typepad.com/seths_blog/2016/11/choose-better.html", url="http://sethgodin.typepad.com/seths_blog/2016/11/all-we-have-is-each-other.html", domain="sethgodin.typepad.com", excerpt="", word_count=462, direction="ltr", total_pages=4, pages_rendered=4>
56
+
57
+ article.title
58
+ article.content
59
+ article.author
60
+ article.date_published
61
+ article.lead_image_url
62
+ article.dek
63
+ article.next_page_url
64
+ article.url
65
+ article.domain
66
+ article.excerpt
67
+ article.word_count
68
+ article.direction
69
+ article.total_pages
70
+ article.rendered_pages
71
+ ```
72
+
73
+ ## Inspiration
74
+ Clone of [readability_parser](https://github.com/phildionne/readability_parser) gem
75
+
76
+ [mercury-url]: https://mercury.postlight.com/web-parser/
@@ -0,0 +1,12 @@
1
+ require 'rspec/core/rake_task'
2
+ require 'rubocop/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec) do |t|
5
+ t.verbose = false
6
+ end
7
+
8
+ RuboCop::RakeTask.new(:rubocop) do |t|
9
+ t.options = ['--display-cop-names']
10
+ end
11
+
12
+ task default: [:spec, :rubocop]
@@ -0,0 +1,46 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://mercury.postlight.com/parser?url=http%3A%2F%2Fwww.google.com
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - MercuryWebParser Ruby Gem 0.1.0
12
+ X-api-key:
13
+ - '1234'
14
+ response:
15
+ status:
16
+ code: 401
17
+ message:
18
+ headers:
19
+ content-length:
20
+ - '26'
21
+ content-type:
22
+ - application/json
23
+ date:
24
+ - Wed, 30 Nov 2016 03:43:36 GMT
25
+ via:
26
+ - 1.1 051be6c213d69e313cabc1e7eee2a118.cloudfront.net (CloudFront)
27
+ x-amz-cf-id:
28
+ - isOCPBGabdbNiXorBpblYgzAa4_BAsbjxzSKCt5SUpdajbSOeCP7rA==
29
+ x-amzn-errortype:
30
+ - UnauthorizedException
31
+ x-amzn-requestid:
32
+ - 2c7f96be-b6af-11e6-a859-2d8d278efab2
33
+ x-cache:
34
+ - Error from cloudfront
35
+ age:
36
+ - '0'
37
+ connection:
38
+ - close
39
+ server:
40
+ - BitBalloon
41
+ body:
42
+ encoding: UTF-8
43
+ string: '{"message":"Unauthorized"}'
44
+ http_version:
45
+ recorded_at: Wed, 30 Nov 2016 03:43:36 GMT
46
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,48 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://mercury.postlight.com/parser?url=http%3A%2F%2Fabc.go.com%2Frobots.txt
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - MercuryWebParser Ruby Gem 0.1.0
12
+ X-api-key:
13
+ - PvKK11D5fKtwj06eD0AevxAdUAWB1Ra3jTaeIgXE
14
+ response:
15
+ status:
16
+ code: 200
17
+ message:
18
+ headers:
19
+ access-control-allow-origin:
20
+ - "*"
21
+ content-length:
22
+ - '4'
23
+ content-type:
24
+ - application/json
25
+ date:
26
+ - Wed, 30 Nov 2016 03:47:28 GMT
27
+ via:
28
+ - 1.1 ea768b6198c0f73b6fb05e3625d27a5d.cloudfront.net (CloudFront)
29
+ x-amz-cf-id:
30
+ - 78gk1qd8sMcETNy4f0guFwWjvT4RZmMn0LqJPCgY3T7ED-jT1V5LCA==
31
+ x-amzn-requestid:
32
+ - b6e3e74a-b6af-11e6-8ccf-b3e2989761ed
33
+ x-amzn-trace-id:
34
+ - Root=1-583e4bd0-8c4ee867e61287c843a16623
35
+ x-cache:
36
+ - Miss from cloudfront
37
+ age:
38
+ - '0'
39
+ connection:
40
+ - close
41
+ server:
42
+ - BitBalloon
43
+ body:
44
+ encoding: UTF-8
45
+ string: 'null'
46
+ http_version:
47
+ recorded_at: Wed, 30 Nov 2016 03:47:28 GMT
48
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,109 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://mercury.postlight.com/parser?url=https%3A%2F%2Fmedium.com%2F%40readability%2Fthe-readability-bookmarking-service-will-shut-down-on-september-30-2016-1641cc18e02b
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ User-Agent:
11
+ - MercuryWebParser Ruby Gem 0.1.0
12
+ X-api-key:
13
+ - PvKK11D5fKtwj06eD0AevxAdUAWB1Ra3jTaeIgXE
14
+ response:
15
+ status:
16
+ code: 200
17
+ message:
18
+ headers:
19
+ access-control-allow-origin:
20
+ - "*"
21
+ content-length:
22
+ - '2706'
23
+ content-type:
24
+ - application/json
25
+ date:
26
+ - Wed, 30 Nov 2016 02:59:26 GMT
27
+ via:
28
+ - 1.1 631585eaa766d0d1b2a196fb1d81ea69.cloudfront.net (CloudFront)
29
+ x-amz-cf-id:
30
+ - yYv44_ZYseF8o8aqui50IgjeckqlcMizRdVqMZXCdX1vnrGH_1Q9iw==
31
+ x-amzn-requestid:
32
+ - 008b8373-b6a9-11e6-aff9-a322a87b8369
33
+ x-amzn-trace-id:
34
+ - Root=1-583e408d-aec2ea51a4d78b1f7da07c83
35
+ x-cache:
36
+ - Miss from cloudfront
37
+ age:
38
+ - '1'
39
+ connection:
40
+ - close
41
+ server:
42
+ - BitBalloon
43
+ body:
44
+ encoding: ASCII-8BIT
45
+ string: !binary |-
46
+ eyJ0aXRsZSI6IlRoZSBSZWFkYWJpbGl0eSBib29rbWFya2luZyBzZXJ2aWNl
47
+ IHdpbGwgc2h1dCBkb3duIG9uIFNlcHRlbWJlciAzMCwgMjAxNi4iLCJjb250
48
+ ZW50IjoiPGRpdj48ZGl2IGNsYXNzPVwic2VjdGlvbi1jb250ZW50XCI+PGRp
49
+ diBjbGFzcz1cInNlY3Rpb24taW5uZXIgc2VjdGlvbkxheW91dC0taW5zZXRD
50
+ b2x1bW5cIj48cCBpZD1cIjI5YmFcIiBjbGFzcz1cImdyYWYgZ3JhZi0tcCBn
51
+ cmFmLWFmdGVyLS1oM1wiPkFmdGVyIG1vcmUgdGhhbiBmaXZlIHllYXJzIG9m
52
+ IG9wZXJhdGlvbiwgdGhlIFJlYWRhYmlsaXR5IGFydGljbGUgYm9va21hcmtp
53
+ bmcvcmVhZC1pdC1sYXRlciBzZXJ2aWNlIHdpbGwgYmUgc2h1dHRpbmcgZG93
54
+ biBhZnRlciA8c3Ryb25nIGNsYXNzPVwibWFya3VwLS1zdHJvbmcgbWFya3Vw
55
+ LS1wLXN0cm9uZ1wiPlNlcHRlbWJlciAzMCwgMjAxNjwvc3Ryb25nPi48L3A+
56
+ PHAgaWQ9XCJiMWQ1XCIgY2xhc3M9XCJncmFmIGdyYWYtLXAgZ3JhZi1hZnRl
57
+ ci0tcFwiPklmIHlvdSYjeDIwMTk7ZCBsaWtlIHRvIHNhdmUgeW91ciBib29r
58
+ bWFya3MsIHBsZWFzZSBmb2xsb3cgdGhlc2UgZGlyZWN0aW9ucyBiZWZvcmUg
59
+ PHN0cm9uZyBjbGFzcz1cIm1hcmt1cC0tc3Ryb25nIG1hcmt1cC0tcC1zdHJv
60
+ bmdcIj5TZXB0ZW1iZXIgMzAsIDIwMTY8L3N0cm9uZz4uIFlvdSBjYW4gZXhw
61
+ b3J0IHlvdXIgYm9va21hcmtzIGJ5IHZpc2l0aW5nIHlvdXIgVG9vbHMgcGFn
62
+ ZSwgc2Nyb2xsaW5nIGRvd24gdG8gdGhlIERhdGEgRXhwb3J0IHNlY3Rpb24s
63
+ IGFuZCBjbGlja2luZyB0aGUgRXhwb3J0IFlvdXIgRGF0YSBidXR0b24uIFlv
64
+ dSYjeDIwMTk7bGwgcmVjZWl2ZSBhbiBlbWFpbCBzb29uIGFmdGVyIHRoYXQg
65
+ Y29udGFpbnMgeW91ciBib29rbWFya3MuIFNpbWlsYXIgc2VydmljZXMgbGlr
66
+ ZSBJbnN0YXBhcGVyIHdpbGwgYWxsb3cgeW91IHRvIGltcG9ydCB5b3VyIGJv
67
+ b2ttYXJrcyBpbnRvIHRoZWlyIHNlcnZpY2UuPC9wPjxwIGlkPVwiNGMxY1wi
68
+ IGNsYXNzPVwiZ3JhZiBncmFmLS1wIGdyYWYtYWZ0ZXItLXBcIj48c3Ryb25n
69
+ IGNsYXNzPVwibWFya3VwLS1zdHJvbmcgbWFya3VwLS1wLXN0cm9uZ1wiPjxl
70
+ bSBjbGFzcz1cIm1hcmt1cC0tZW0gbWFya3VwLS1wLWVtXCI+V2hhdCBpZiBJ
71
+ IHVzZSB0aGUgUmVhZGFiaWxpdHkgUGFyc2VyIEFQST88L2VtPjwvc3Ryb25n
72
+ PjwvcD48cCBpZD1cIjYwOGVcIiBjbGFzcz1cImdyYWYgZ3JhZi0tcCBncmFm
73
+ LWFmdGVyLS1wXCI+VGhlIFJlYWRhYmlsaXR5IFBhcnNlciBBUEkgZm9yIGRl
74
+ dmVsb3BlcnMgd2lsbCBzaHV0IGRvd24gPHN0cm9uZyBjbGFzcz1cIm1hcmt1
75
+ cC0tc3Ryb25nIG1hcmt1cC0tcC1zdHJvbmdcIj5EZWNlbWJlciAxMCwgMjAx
76
+ Njwvc3Ryb25nPi4gSG93ZXZlciwgd2UmI3gyMDE5O3ZlIGdvdCBhIGdyZWF0
77
+ IGFsdGVybmF0aXZlIGZvciB5b3U6IHRoZSA8YSBocmVmPVwiaHR0cHM6Ly9t
78
+ ZXJjdXJ5LnBvc3RsaWdodC5jb20vXCIgY2xhc3M9XCJtYXJrdXAtLWFuY2hv
79
+ ciBtYXJrdXAtLXAtYW5jaG9yXCI+PHN0cm9uZyBjbGFzcz1cIm1hcmt1cC0t
80
+ c3Ryb25nIG1hcmt1cC0tcC1zdHJvbmdcIj5NZXJjdXJ5IFRvb2xraXQ8L3N0
81
+ cm9uZz48L2E+LjwvcD48cCBpZD1cIjFkZWJcIiBjbGFzcz1cImdyYWYgZ3Jh
82
+ Zi0tcCBncmFmLWFmdGVyLS1wXCI+U2luY2UgaXQgbGF1bmNoZWQgYXMgYSBz
83
+ aW1wbGUgYm9va21hcmtsZXQgaW4gMjAwOSwgdGhlIFJlYWRhYmlsaXR5IHBy
84
+ b2plY3QmI3gyMDE5O3MgaW1wYWN0IG9uIHJlYWRpbmcgb24gdGhlIHdlYiBh
85
+ bmQgYmV5b25kIGlzIHVuZGVuaWFibGUuIFdlIGFwcHJlY2lhdGUgeW91ciBs
86
+ b3lhbHR5IGFuZCBzdXBwb3J0IGZvciB0aGUgcGxhdGZvcm0gb3ZlciB0aGUg
87
+ eWVhcnMuPC9wPjxwIGlkPVwiZDBiNlwiIGNsYXNzPVwiZ3JhZiBncmFmLS1w
88
+ IGdyYWYtYWZ0ZXItLXBcIj5XZSB3ZWxjb21lIHlvdXIgcXVlc3Rpb25zIGF0
89
+ IDxhIGhyZWY9XCJtYWlsdG86Y29udGFjdCUyQnJpbEByZWFkYWJpbGl0eS5j
90
+ b21cIiBjbGFzcz1cIm1hcmt1cC0tYW5jaG9yIG1hcmt1cC0tcC1hbmNob3Jc
91
+ Ij5jb250YWN0K3JpbEByZWFkYWJpbGl0eS5jb208L2E+LjwvcD48cCBpZD1c
92
+ ImMwMjRcIiBjbGFzcz1cImdyYWYgZ3JhZi0tcCBncmFmLWFmdGVyLS1wIGdy
93
+ YWYtLWxhc3RcIj48ZW0gY2xhc3M9XCJtYXJrdXAtLWVtIG1hcmt1cC0tcC1l
94
+ bVwiPlRoZSBSZWFkYWJpbGl0eSBUZWFtPC9lbT48L3A+PC9kaXY+PC9kaXY+
95
+ PC9kaXY+IiwiYXV0aG9yIjoiUmVhZGFiaWxpdHkiLCJkYXRlX3B1Ymxpc2hl
96
+ ZCI6IjIwMTYtMDktMDJUMTg6NDM6NTkuODY0WiIsImxlYWRfaW1hZ2VfdXJs
97
+ IjpudWxsLCJkZWsiOm51bGwsIm5leHRfcGFnZV91cmwiOm51bGwsInVybCI6
98
+ Imh0dHBzOi8vbWVkaXVtLmNvbS9AcmVhZGFiaWxpdHkvdGhlLXJlYWRhYmls
99
+ aXR5LWJvb2ttYXJraW5nLXNlcnZpY2Utd2lsbC1zaHV0LWRvd24tb24tc2Vw
100
+ dGVtYmVyLTMwLTIwMTYtMTY0MWNjMThlMDJiIiwiZG9tYWluIjoibWVkaXVt
101
+ LmNvbSIsImV4Y2VycHQiOiJBZnRlciBtb3JlIHRoYW4gZml2ZSB5ZWFycyBv
102
+ ZiBvcGVyYXRpb24sIHRoZSBSZWFkYWJpbGl0eSBhcnRpY2xlIGJvb2ttYXJr
103
+ aW5nL3JlYWQtaXQtbGF0ZXIgc2VydmljZSB3aWxsIGJlIHNodXR0aW5nIGRv
104
+ d24gYWZ0ZXIgU2VwdGVtYmVyIDMw4oCmIiwid29yZF9jb3VudCI6MTUxLCJk
105
+ aXJlY3Rpb24iOiJsdHIiLCJ0b3RhbF9wYWdlcyI6MSwicmVuZGVyZWRfcGFn
106
+ ZXMiOjF9
107
+ http_version:
108
+ recorded_at: Wed, 30 Nov 2016 02:59:26 GMT
109
+ recorded_with: VCR 3.0.3
@@ -0,0 +1,29 @@
1
+ require 'mercury_web_parser/configuration'
2
+ require 'mercury_web_parser/client'
3
+
4
+ module MercuryWebParser
5
+ extend Configuration
6
+
7
+ class << self
8
+ # Alias for MercuryWebParser::Client.new
9
+ #
10
+ # @return [MercuryWebParser::Client]
11
+ def new(options = {})
12
+ MercuryWebParser::Client.new(options)
13
+ end
14
+
15
+ # Delegate to MercuryWebParser::Client
16
+ def method_missing(method, *args, &block)
17
+ return super unless new.respond_to?(method)
18
+ new.send(method, *args, &block)
19
+ end
20
+
21
+ def respond_to?(method, include_private = false)
22
+ new.respond_to?(method, include_private) || super(method, include_private)
23
+ end
24
+
25
+ def respond_to_missing?(method_name, include_private = false)
26
+ Configuration::VALID_CONFIG_KEYS.include?(method_name) || super
27
+ end
28
+ end
29
+ end # MercuryWebParser
@@ -0,0 +1,16 @@
1
+ module MercuryWebParser
2
+ module API
3
+ module Content
4
+ # Parse a webpage and return its main content
5
+ # Returns a MercuryWebParser::Article object
6
+ #
7
+ # @param url [String] The URL of an article to return the content for
8
+ # @return [MercuryWebParser::Article]
9
+ def parse(url)
10
+ response = get('', url: url)
11
+
12
+ MercuryWebParser::Article.new(response)
13
+ end
14
+ end # Content
15
+ end # API
16
+ end
@@ -0,0 +1,12 @@
1
+ require 'ostruct'
2
+
3
+ module MercuryWebParser
4
+ class Article < OpenStruct
5
+ # Returns a MercuryWebParser::Article object
6
+ #
7
+ # @return [MercuryWebParser::Article]
8
+ def initialize(article)
9
+ super
10
+ end
11
+ end # Article
12
+ end
@@ -0,0 +1,21 @@
1
+ require 'mercury_web_parser/connection'
2
+ require 'mercury_web_parser/request'
3
+ require 'mercury_web_parser/api/content'
4
+ require 'mercury_web_parser/article'
5
+
6
+ module MercuryWebParser
7
+ class Client
8
+ attr_accessor(*Configuration::VALID_CONFIG_KEYS)
9
+
10
+ def initialize(options = {})
11
+ options = MercuryWebParser.options.merge(options)
12
+ Configuration::VALID_OPTIONS_KEYS.each do |key|
13
+ send("#{key}=", options[key])
14
+ end
15
+ end
16
+
17
+ include MercuryWebParser::Connection
18
+ include MercuryWebParser::Request
19
+ include MercuryWebParser::API::Content
20
+ end # Client
21
+ end
@@ -0,0 +1,37 @@
1
+ require 'mercury_web_parser/version'
2
+
3
+ module MercuryWebParser
4
+ module Configuration
5
+ VALID_CONNECTION_KEYS = [:api_endpoint, :user_agent].freeze
6
+ VALID_OPTIONS_KEYS = [:api_token].freeze
7
+ VALID_CONFIG_KEYS = VALID_CONNECTION_KEYS + VALID_OPTIONS_KEYS
8
+
9
+ DEFAULT_API_ENDPOINT = 'https://mercury.postlight.com/parser'.freeze
10
+ DEFAULT_USER_AGENT = "MercuryWebParser Ruby Gem #{MercuryWebParser::VERSION}".freeze # rubocop:disable Metrics/LineLength
11
+ DEFAULT_API_TOKEN = nil
12
+
13
+ attr_accessor(*VALID_CONFIG_KEYS)
14
+
15
+ def self.extended(base)
16
+ base.reset!
17
+ end
18
+
19
+ # Convenience method to allow configuration options to be set in a block
20
+ def configure
21
+ yield self
22
+ end
23
+
24
+ def options
25
+ Hash[* VALID_CONFIG_KEYS.map { |key| [key, send(key)] }.flatten]
26
+ end
27
+
28
+ def reset!
29
+ self.api_endpoint = DEFAULT_API_ENDPOINT
30
+ self.user_agent = DEFAULT_USER_AGENT
31
+
32
+ self.api_token = DEFAULT_API_TOKEN
33
+
34
+ true
35
+ end
36
+ end # Configuration
37
+ end
@@ -0,0 +1,42 @@
1
+ require 'faraday'
2
+ require 'faraday_middleware'
3
+
4
+ module MercuryWebParser
5
+ module Connection
6
+ # Instantiate a Faraday::Connection
7
+ # @private
8
+ private
9
+
10
+ # Returns a Faraday::Connection object
11
+ #
12
+ # @return [Faraday::Connection]
13
+ def connection(options = {})
14
+ options = {
15
+ url: MercuryWebParser.api_endpoint
16
+ }.merge(options)
17
+
18
+ get_connection(options)
19
+ end
20
+
21
+ def get_connection(options)
22
+ conn = Faraday.new(options) do |c|
23
+ # encode request params as "www-form-urlencoded"
24
+ c.use Faraday::Request::UrlEncoded
25
+
26
+ c.use FaradayMiddleware::FollowRedirects, limit: 3
27
+
28
+ # raise exceptions on 40x, 50x responses
29
+ c.use Faraday::Response::RaiseError
30
+
31
+ c.response :json, content_type: /\bjson$/
32
+
33
+ c.adapter Faraday.default_adapter
34
+ end
35
+
36
+ conn.headers[:user_agent] = MercuryWebParser.user_agent
37
+ conn.headers[:"x-api-key"] = MercuryWebParser.api_token
38
+
39
+ conn
40
+ end
41
+ end # Connection
42
+ end
@@ -0,0 +1,51 @@
1
+ module MercuryWebParser
2
+ class Error < StandardError
3
+ # Raised when Mercury returns a 4xx or 500 HTTP status code
4
+ class ClientError < Error
5
+ # Creates a new error from an HTTP environement
6
+ #
7
+ # @param response [Hash]
8
+ # @return [MercuryWebParser::Error::ClientError]
9
+ def initialize(error = nil)
10
+ http_error = error.response[:status].to_i
11
+
12
+ if ERROR_MAP.key?(http_error)
13
+ raise ERROR_MAP[http_error], error.response[:body]['message']
14
+ end
15
+
16
+ super
17
+ end
18
+ end # ClientError
19
+
20
+ class ConfigurationError < MercuryWebParser::Error; end
21
+
22
+ # Raised when there's an error in Faraday
23
+ class RequestError < MercuryWebParser::Error; end
24
+
25
+ # Raised when MercuryWebParser returns a 400 HTTP status code
26
+ class BadRequest < MercuryWebParser::Error; end
27
+
28
+ # Raised when the response from the parser is null
29
+ class NotParseable < MercuryWebParser::Error; end
30
+
31
+ # Raised when MercuryWebParser returns a 401 HTTP status code
32
+ class UnauthorizedRequest < MercuryWebParser::Error; end
33
+
34
+ # Raised when MercuryWebParser returns a 403 HTTP status code
35
+ class Forbidden < MercuryWebParser::Error; end
36
+
37
+ # Raised when MercuryWebParser returns a 404 HTTP status code
38
+ class NotFound < MercuryWebParser::Error; end
39
+
40
+ # Raised when MercuryWebParser returns a 500 HTTP status code
41
+ class InternalServerError < MercuryWebParser::Error; end
42
+
43
+ ERROR_MAP = {
44
+ 400 => MercuryWebParser::Error::BadRequest,
45
+ 401 => MercuryWebParser::Error::UnauthorizedRequest,
46
+ 403 => MercuryWebParser::Error::Forbidden,
47
+ 404 => MercuryWebParser::Error::NotFound,
48
+ 500 => MercuryWebParser::Error::InternalServerError
49
+ }.freeze
50
+ end # Error
51
+ end
@@ -0,0 +1,37 @@
1
+ require 'mercury_web_parser/error'
2
+
3
+ module MercuryWebParser
4
+ module Request
5
+ # Performs a HTTP Get request
6
+ def get(path, params = {})
7
+ request(:get, path, params)
8
+ end
9
+
10
+ private
11
+
12
+ # Returns a Faraday::Response object
13
+ #
14
+ # @return [Faraday::Response]
15
+ def request(method, path, params = {})
16
+ if api_token.nil?
17
+ raise MercuryWebParser::Error::ConfigurationError,
18
+ 'Please configure MercuryWebParser.api_token first'
19
+ end
20
+
21
+ make_request(method, path, params)
22
+ end
23
+
24
+ def make_request(method, path, params)
25
+ response = connection.send(method) do |request|
26
+ request.url(path, params)
27
+ end
28
+ response.body
29
+ rescue Faraday::ParsingError
30
+ raise MercuryWebParser::Error::NotParseable, 'Unparseable response'
31
+ rescue Faraday::Error::ClientError => error
32
+ raise MercuryWebParser::Error::ClientError, error
33
+ rescue => error
34
+ raise MercuryWebParser::Error::RequestError, error
35
+ end
36
+ end # Request
37
+ end
@@ -0,0 +1,3 @@
1
+ module MercuryWebParser
2
+ VERSION = '0.1.0'.freeze
3
+ end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/mercury_web_parser/version', __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'mercury_web_parser'
6
+ s.version = MercuryWebParser::VERSION
7
+ s.summary = 'A simple Ruby wrapper for the Mercury Web Parser API'
8
+ s.authors = ['Jim Fiorato']
9
+ s.email = 'mercury_web_parser@theoldreader.com'
10
+ s.homepage = 'http://github.com/theoldreader/mercury-web-parser'
11
+ s.license = 'MIT'
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.require_paths = ['lib']
15
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+
17
+ s.required_ruby_version = '>= 2.0'
18
+
19
+ s.add_dependency 'faraday', '>= 0.9'
20
+ s.add_dependency 'faraday_middleware', '>= 0.9'
21
+
22
+ s.add_development_dependency 'danger'
23
+ s.add_development_dependency 'danger-commit_lint'
24
+ s.add_development_dependency 'rspec'
25
+ s.add_development_dependency 'rubocop'
26
+ s.add_development_dependency 'vcr'
27
+ end
@@ -0,0 +1,93 @@
1
+ require 'spec_helper'
2
+
3
+ describe MercuryWebParser::Client do
4
+ after do
5
+ MercuryWebParser.reset!
6
+ end
7
+
8
+ context 'with module configuration' do
9
+ before do
10
+ MercuryWebParser.configure do |config|
11
+ MercuryWebParser::Configuration::VALID_CONFIG_KEYS.each do |key|
12
+ config.send("#{key}=", key)
13
+ end
14
+ end
15
+ end
16
+
17
+ it 'inherits the module configuration' do
18
+ MercuryWebParser::Configuration::VALID_CONFIG_KEYS.each do |key|
19
+ expect(MercuryWebParser.method(:"#{key}")).to be_a_kind_of(Method)
20
+ expect(MercuryWebParser.send(:"#{key}")).to eq(key)
21
+ end
22
+ end
23
+ end
24
+
25
+ context 'with class configuration' do
26
+ before do
27
+ @configuration = {
28
+ api_token: '1234'
29
+ }
30
+ end
31
+
32
+ it 'overrides the module configuration after initialization' do
33
+ MercuryWebParser.configure do |config|
34
+ @configuration.each do |key, value|
35
+ config.send("#{key}=", value)
36
+ end
37
+ end
38
+
39
+ MercuryWebParser::Configuration::VALID_OPTIONS_KEYS.each do |key|
40
+ expect(MercuryWebParser.send(:"#{key}")).to eq(@configuration[key])
41
+ end
42
+ end
43
+ end
44
+
45
+ describe '#connection' do
46
+ it 'looks like Faraday connection' do
47
+ expect(subject.send(:connection)).to respond_to(:run_request)
48
+ end
49
+ end
50
+
51
+ describe '#request' do
52
+ before do
53
+ MercuryWebParser.api_token = '1234'
54
+ end
55
+
56
+ it 'successfully parses a page' do
57
+ VCR.use_cassette('successful') do
58
+ url = 'https://medium.com/@readability/the-readability-bookmarking-service-will-shut-down-on-september-30-2016-1641cc18e02b'
59
+ article = MercuryWebParser.parse(url)
60
+ expect(article.title).to match(/The Readability bookmarking service/)
61
+ expect(article.content).to match(/receive an email/)
62
+ expect(article.author).to eq('Readability')
63
+ expect(article.date_published).to eq('2016-09-02T18:43:59.864Z')
64
+ expect(article.lead_image_url).to be_nil
65
+ expect(article.dek).to be_nil
66
+ expect(article.next_page_url).to be_nil
67
+ expect(article.url).to eq(url)
68
+ expect(article.domain).to eq('medium.com')
69
+ expect(article.excerpt).to match(/more than five years/)
70
+ expect(article.word_count).to eq(151)
71
+ expect(article.direction).to eq('ltr')
72
+ expect(article.total_pages).to eq(1)
73
+ expect(article.rendered_pages).to eq(1)
74
+ end
75
+ end
76
+
77
+ it 'catches Faraday connection errors' do
78
+ VCR.use_cassette('connection_errors') do
79
+ expect { MercuryWebParser.parse('http://www.google.com') }.to(
80
+ raise_error(MercuryWebParser::Error::UnauthorizedRequest)
81
+ )
82
+ end
83
+ end
84
+
85
+ it 'catches Mercury Web Parser API errors' do
86
+ VCR.use_cassette('parser_errors') do
87
+ expect { MercuryWebParser.parse('http://abc.go.com/robots.txt') }.to(
88
+ raise_error(MercuryWebParser::Error::NotParseable)
89
+ )
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+
3
+ describe MercuryWebParser do
4
+ after do
5
+ MercuryWebParser.reset!
6
+ end
7
+
8
+ describe '#new' do
9
+ it 'is a MercuryWebParser::Client' do
10
+ expect(MercuryWebParser.new).to be_a_kind_of(MercuryWebParser::Client)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../lib/mercury_web_parser')
2
+ require 'vcr'
3
+
4
+ VCR.configure do |config|
5
+ config.cassette_library_dir = 'fixtures/vcr_cassettes'
6
+ config.hook_into :faraday
7
+ end
metadata ADDED
@@ -0,0 +1,170 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mercury_web_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jim Fiorato
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-11-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0.9'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday_middleware
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0.9'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: danger
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: danger-commit_lint
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: vcr
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description:
112
+ email: mercury_web_parser@theoldreader.com
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files: []
116
+ files:
117
+ - ".gitignore"
118
+ - ".rspec"
119
+ - ".rubocop.yml"
120
+ - ".travis.yml"
121
+ - CHANGELOG.md
122
+ - Dangerfile
123
+ - Gemfile
124
+ - LICENSE
125
+ - README.md
126
+ - Rakefile
127
+ - fixtures/vcr_cassettes/connection_errors.yml
128
+ - fixtures/vcr_cassettes/parser_errors.yml
129
+ - fixtures/vcr_cassettes/successful.yml
130
+ - lib/mercury_web_parser.rb
131
+ - lib/mercury_web_parser/api/content.rb
132
+ - lib/mercury_web_parser/article.rb
133
+ - lib/mercury_web_parser/client.rb
134
+ - lib/mercury_web_parser/configuration.rb
135
+ - lib/mercury_web_parser/connection.rb
136
+ - lib/mercury_web_parser/error.rb
137
+ - lib/mercury_web_parser/request.rb
138
+ - lib/mercury_web_parser/version.rb
139
+ - mercury_web_parser.gemspec
140
+ - spec/mercury_web_parser/client_spec.rb
141
+ - spec/mercury_web_parser_spec.rb
142
+ - spec/spec_helper.rb
143
+ homepage: http://github.com/theoldreader/mercury-web-parser
144
+ licenses:
145
+ - MIT
146
+ metadata: {}
147
+ post_install_message:
148
+ rdoc_options: []
149
+ require_paths:
150
+ - lib
151
+ required_ruby_version: !ruby/object:Gem::Requirement
152
+ requirements:
153
+ - - ">="
154
+ - !ruby/object:Gem::Version
155
+ version: '2.0'
156
+ required_rubygems_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: '0'
161
+ requirements: []
162
+ rubyforge_project:
163
+ rubygems_version: 2.4.8
164
+ signing_key:
165
+ specification_version: 4
166
+ summary: A simple Ruby wrapper for the Mercury Web Parser API
167
+ test_files:
168
+ - spec/mercury_web_parser/client_spec.rb
169
+ - spec/mercury_web_parser_spec.rb
170
+ - spec/spec_helper.rb