medium-scrapper 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 159c9a1de5200ace4fd046cc2730f93d4a91e053
4
+ data.tar.gz: 1234666074cdde0a5bdbbc2a46eb285c809f4653
5
+ SHA512:
6
+ metadata.gz: 076802e6c3c210cf301a80e6e2b377fa7b10d17ad93184e45c1a242f3730483f79f37650adbe993b2d96e04b16eff5340103ece74732b07c9df051159cd523e6
7
+ data.tar.gz: 889135aede45a43ca0e821f057c14585226262420867c3f3e4a6de25ecde3540f21de74ca3dd2746ee18b5f8771e010a5975a23ce590f2add13c0c48e94075a9
@@ -0,0 +1,17 @@
1
+
2
+ require "model/user"
3
+
4
+ class UserBuilder
5
+ def initialize(raw_user)
6
+ @raw_user = raw_user
7
+ end
8
+
9
+ def build
10
+ name = @raw_user["name"]
11
+ username = @raw_user["username"]
12
+ user_id = @raw_user["userId"]
13
+ bio = @raw_user["bio"]
14
+
15
+ User.new(name, username, user_id, bio)
16
+ end
17
+ end
@@ -0,0 +1,11 @@
1
+
2
+ require "retriever/user_retriever"
3
+ require "parser/url_parser"
4
+
5
+ class MediumScrapper
6
+ attr_reader :user
7
+
8
+ def load_user(username)
9
+ UserRetriever.new(UrlParser).load(username)
10
+ end
11
+ end
data/lib/model/post.rb ADDED
@@ -0,0 +1,10 @@
1
+
2
+ class Post
3
+ attr_reader :title
4
+
5
+ def initialize(title, subtitle, content)
6
+ @title = title
7
+ @subtitle = subtitle
8
+ @content = content
9
+ end
10
+ end
data/lib/model/user.rb ADDED
@@ -0,0 +1,16 @@
1
+
2
+ class User
3
+ attr_reader :name, :username, :user_id, :bio, :posts
4
+
5
+ def initialize(name, username, user_id, bio)
6
+ @name = name
7
+ @username = username
8
+ @user_id = user_id
9
+ @bio = bio
10
+ @posts = []
11
+ end
12
+
13
+ def add_post(post)
14
+ @posts << post
15
+ end
16
+ end
@@ -0,0 +1,19 @@
1
+
2
+ require "open-uri"
3
+ require "json"
4
+
5
+ class UrlParser
6
+ def initialize(url)
7
+ @url = url
8
+ end
9
+
10
+ def parse
11
+ content = ""
12
+ open(@url) do |file|
13
+ file.each_line do |line|
14
+ content << line
15
+ end
16
+ end
17
+ JSON.parse(content[16..-1])["payload"]
18
+ end
19
+ end
@@ -0,0 +1,26 @@
1
+
2
+ require "model/post"
3
+
4
+ class PostRetriever
5
+ def initialize(parser)
6
+ @parser = parser
7
+ end
8
+
9
+ def load(user_id, post_id)
10
+ parsed_url = parse_url(user_id, post_id)
11
+
12
+ Post.new(parsed_url["title"], parsed_url["content"]["subtitle"], normalize_content(parsed_url["content"]["bodyModel"]["paragraphs"]))
13
+ end
14
+
15
+ def normalize_content(paragraphs_raw)
16
+ body = ""
17
+ paragraphs_raw[1..-1].each do |paragraph|
18
+ body << paragraph["text"] + "\n\n"
19
+ end
20
+ body
21
+ end
22
+
23
+ def parse_url(user_id, post_id)
24
+ @parser.new("https://www.medium.com/@#{user_id}/#{post_id}?format=json").parse["value"]
25
+ end
26
+ end
@@ -0,0 +1,28 @@
1
+
2
+ require "builder/user_builder"
3
+ require "retriever/post_retriever"
4
+
5
+ class UserRetriever
6
+ def initialize(parser)
7
+ @parser = parser
8
+ end
9
+
10
+ def load(username)
11
+ parsed_url = parse_url(username)
12
+
13
+ user = UserBuilder.new(parsed_url["value"]).build
14
+
15
+ posts_raw = parsed_url["latestPosts"]
16
+ posts_raw.each do |post_raw|
17
+ user.add_post(PostRetriever.new(@parser).load(user.user_id, post_raw["id"]))
18
+ end
19
+
20
+ user
21
+ end
22
+
23
+ private
24
+
25
+ def parse_url(username)
26
+ @parser.new("https://www.medium.com/@#{username}?format=json").parse
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: medium-scrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - uesteibar
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-06-21 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Ruby gem that provides a Medium.com read-only API
14
+ email: uesteibar@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/builder/user_builder.rb
20
+ - lib/medium-scrapper.rb
21
+ - lib/model/post.rb
22
+ - lib/model/user.rb
23
+ - lib/parser/url_parser.rb
24
+ - lib/retriever/post_retriever.rb
25
+ - lib/retriever/user_retriever.rb
26
+ homepage: https://github.com/uesteibar/medium-gem
27
+ licenses:
28
+ - MIT
29
+ metadata: {}
30
+ post_install_message:
31
+ rdoc_options: []
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ requirements: []
45
+ rubyforge_project:
46
+ rubygems_version: 2.4.6
47
+ signing_key:
48
+ specification_version: 4
49
+ summary: medium read-only api
50
+ test_files: []
51
+ has_rdoc: