telegraph-parser 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c27c84d13c2d7725fbb32104dc5a42a92998ceb0
4
- data.tar.gz: f42a5d5cc5a7c1135d615df81de818cbec4e85ea
3
+ metadata.gz: 26cc01f88e7399f22fa4289fbb7d48dd9abb1e42
4
+ data.tar.gz: 3396ed38a2844d8daf0512107246e2f9cb07fa37
5
5
  SHA512:
6
- metadata.gz: 656239f075164f8d6dd4a241cc000d166ba1c9ad60071e905e979d2c1f9bc1914badf8d314146af878ba7d3f7a9a3ae8ebc29268a0d3301b5207dc2c8399f4e4
7
- data.tar.gz: d62631df9d0482117e006283bb43adb40743dfccff430e5b7e8623f3a9dd4894f50b53685b1c8d8a1ca6822c255458333c4d59e58d232c28807cbf466615d06e
6
+ metadata.gz: 9275b8c5a759ab28c47dc4d0da3968ba965c9a5bcb8684dbbb7bac626013b348b6649597b57373958d5d9d3a52a59c4f62e1af5b6f2b19455cd16e6b18e0ebe0
7
+ data.tar.gz: 51ba6f30a59614dcd26670c101bd059b4a6f6655c985fc7b0eaa261c2350be350a5259caa75a29dcfd575d6e9fc7b53c2e272fcce7ede226a946f606486a94a8
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.3
4
+ before_install: gem install bundler -v 1.11.2
@@ -1,21 +1,21 @@
1
1
  module Telegraph
2
2
  module Parser
3
3
  class Article
4
- attr_accessor :id, :title, :author, :content, :images
4
+ ATTRIBUTES = %i[id title author content images].freeze
5
+
6
+ attr_reader *ATTRIBUTES
5
7
 
6
8
  def initialize(attributes)
7
9
  attributes.each do |attr, val|
8
- next unless respond_to?("#{attr}=")
9
- send("#{attr}=", val)
10
+ next unless ATTRIBUTES.include?(attr.to_sym)
11
+ instance_variable_set(:"@#{attr}", val)
10
12
  end
11
13
  end
12
14
 
13
- def self.load(article_id, image_prefix: '')
14
- binding = Binding.new(article_id)
15
-
16
- Parser.new(binding, image_prefix).article.tap do |article|
17
- article.id = article_id
18
- end
15
+ def self.find(article_id, image_prefix: '')
16
+ parser = Parser.new(article_id, image_prefix)
17
+ parser.fetch_and_parse!
18
+ Article.new(parser.parsed_data)
19
19
  end
20
20
  end
21
21
  end
@@ -0,0 +1,5 @@
1
+ module Telegraph
2
+ module Parser
3
+ class ArticleNotFound < StandardError; end
4
+ end
5
+ end
@@ -0,0 +1,23 @@
1
+ require 'open-uri'
2
+
3
+ module Telegraph
4
+ module Parser
5
+ class Fetcher
6
+ HOST = 'telegra.ph'.freeze
7
+
8
+ def fetch(article_id)
9
+ Net::HTTP.get(HOST, "/#{article_id}")
10
+ rescue EOFError
11
+ raise Telegraph::Parser::ArticleNotFound
12
+ end
13
+
14
+ def fetch_image(src, prefix)
15
+ { image_id(src, prefix) => open(src).read }
16
+ end
17
+
18
+ def image_id(src, prefix)
19
+ "#{prefix}/#{File.basename(URI.parse(src).path)}"
20
+ end
21
+ end
22
+ end
23
+ end
@@ -3,48 +3,56 @@ require 'nokogiri'
3
3
  module Telegraph
4
4
  module Parser
5
5
  class Parser
6
- attr_accessor :bindings, :page, :article, :image_prefix
6
+ attr_reader :parsed_data
7
7
 
8
- def initialize(binding, image_prefix)
9
- self.bindings = binding
10
- self.image_prefix = image_prefix
11
- parse_html
8
+ def initialize(article_id, image_prefix)
9
+ @fetcher = Fetcher.new
10
+ @article_id = article_id
11
+ @image_prefix = image_prefix
12
+ end
13
+
14
+ def fetch_and_parse!
15
+ load_page!
16
+ load_images!
17
+ compile_parsed_data!
12
18
  end
13
19
 
14
20
  private
15
21
 
16
- def page
17
- @page ||= Nokogiri::HTML(bindings.content).css('article')
22
+ def load_page!
23
+ @page = Nokogiri::HTML(@fetcher.fetch(@article_id)).css('article')
18
24
  end
19
25
 
20
- def images
21
- {}.tap do |images|
22
- page.search('.//img').each do |image|
23
- images.merge!(bindings.image(image.attributes['src'].value,
24
- image_prefix))
25
- end
26
- end
26
+ def compile_parsed_data!
27
+ @parsed_data = {
28
+ article_id: @article_id,
29
+ title: tag_content(:h1),
30
+ author: tag_content(:address),
31
+ content: without_tags,
32
+ images: @images
33
+ }
27
34
  end
28
35
 
29
- def parse_html
30
- images_hash = images
31
- self.article = Article.new(title: tag_content(:h1),
32
- author: tag_content(:address),
33
- content: without_tags,
34
- images: images_hash)
36
+ def load_images!
37
+ @images = {}
38
+
39
+ @page.search('.//img').each do |image|
40
+ i = @fetcher.fetch_image(image.attributes['src'].value, @image_prefix)
41
+ @images.merge!(i)
42
+ end
35
43
  end
36
44
 
37
45
  def tag_content(tag)
38
- page.search(".//#{tag}").text
46
+ @page.search(".//#{tag}").text
39
47
  end
40
48
 
41
49
  def without_tags
42
- page_copy = page.dup
43
- %i(h1 address).each { |tag| page.search(".//#{tag}").remove }
50
+ page_copy = @page.dup
51
+ %i(h1 address).each { |tag| @page.search(".//#{tag}").remove }
44
52
 
45
53
  page_copy.search('.//img').each do |image|
46
54
  image.attributes['src'].value =
47
- bindings.image_id(image.attributes['src'].value, image_prefix)
55
+ @fetcher.image_id(image.attributes['src'].value, @image_prefix)
48
56
  end
49
57
 
50
58
  page_copy.remove_attr('id').remove_class.to_s
@@ -1,5 +1,5 @@
1
1
  module Telegraph
2
2
  module Parser
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
@@ -1,5 +1,6 @@
1
1
  require "telegraph/parser/version"
2
- require "telegraph/parser/binding"
2
+ require "telegraph/parser/errors"
3
+ require "telegraph/parser/fetcher"
3
4
  require "telegraph/parser/parser"
4
5
  require "telegraph/parser/article"
5
6
 
@@ -22,4 +22,5 @@ Gem::Specification.new do |spec|
22
22
  spec.add_development_dependency "rake", "~> 10.0"
23
23
  spec.add_development_dependency "rspec", "~> 3.0"
24
24
  spec.add_development_dependency "nokogiri", "~> 1.6.8"
25
+ spec.add_development_dependency "pry"
25
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: telegraph-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sergey Vernidub
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-12-11 00:00:00.000000000 Z
11
+ date: 2018-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: 1.6.8
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  description:
70
84
  email:
71
85
  - svernidub@gmail.com
@@ -74,6 +88,8 @@ extensions: []
74
88
  extra_rdoc_files: []
75
89
  files:
76
90
  - ".gitignore"
91
+ - ".rspec"
92
+ - ".travis.yml"
77
93
  - Gemfile
78
94
  - LICENSE.txt
79
95
  - README.md
@@ -82,7 +98,8 @@ files:
82
98
  - bin/setup
83
99
  - lib/telegraph/parser.rb
84
100
  - lib/telegraph/parser/article.rb
85
- - lib/telegraph/parser/binding.rb
101
+ - lib/telegraph/parser/errors.rb
102
+ - lib/telegraph/parser/fetcher.rb
86
103
  - lib/telegraph/parser/parser.rb
87
104
  - lib/telegraph/parser/version.rb
88
105
  - telegraph-parser.gemspec
@@ -1,31 +0,0 @@
1
- require 'open-uri'
2
-
3
- module Telegraph
4
- module Parser
5
- class Binding
6
- BASE_URI = 'telegra.ph'
7
-
8
- attr_accessor :article_id
9
-
10
- def initialize(article_id)
11
- self.article_id = article_id
12
- end
13
-
14
- def content
15
- Net::HTTP.get(BASE_URI, "/#{article_id}")
16
- rescue EOFError
17
- ""
18
- end
19
-
20
- def image(src, prefix)
21
- src.gsub!('http://telegra.ph', '')
22
- { image_id(src, prefix) => open("http://#{BASE_URI}#{src}").read }
23
- end
24
-
25
- def image_id(src, prefix)
26
- src.gsub!('http://telegra.ph', '')
27
- "#{prefix}/#{File.basename(URI.parse("#{BASE_URI}#{src}").path)}"
28
- end
29
- end
30
- end
31
- end