telegraph-parser 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c27c84d13c2d7725fbb32104dc5a42a92998ceb0
4
- data.tar.gz: f42a5d5cc5a7c1135d615df81de818cbec4e85ea
3
+ metadata.gz: 26cc01f88e7399f22fa4289fbb7d48dd9abb1e42
4
+ data.tar.gz: 3396ed38a2844d8daf0512107246e2f9cb07fa37
5
5
  SHA512:
6
- metadata.gz: 656239f075164f8d6dd4a241cc000d166ba1c9ad60071e905e979d2c1f9bc1914badf8d314146af878ba7d3f7a9a3ae8ebc29268a0d3301b5207dc2c8399f4e4
7
- data.tar.gz: d62631df9d0482117e006283bb43adb40743dfccff430e5b7e8623f3a9dd4894f50b53685b1c8d8a1ca6822c255458333c4d59e58d232c28807cbf466615d06e
6
+ metadata.gz: 9275b8c5a759ab28c47dc4d0da3968ba965c9a5bcb8684dbbb7bac626013b348b6649597b57373958d5d9d3a52a59c4f62e1af5b6f2b19455cd16e6b18e0ebe0
7
+ data.tar.gz: 51ba6f30a59614dcd26670c101bd059b4a6f6655c985fc7b0eaa261c2350be350a5259caa75a29dcfd575d6e9fc7b53c2e272fcce7ede226a946f606486a94a8
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.3
4
+ before_install: gem install bundler -v 1.11.2
@@ -1,21 +1,21 @@
1
1
  module Telegraph
2
2
  module Parser
3
3
  class Article
4
- attr_accessor :id, :title, :author, :content, :images
4
+ ATTRIBUTES = %i[id title author content images].freeze
5
+
6
+ attr_reader *ATTRIBUTES
5
7
 
6
8
  def initialize(attributes)
7
9
  attributes.each do |attr, val|
8
- next unless respond_to?("#{attr}=")
9
- send("#{attr}=", val)
10
+ next unless ATTRIBUTES.include?(attr.to_sym)
11
+ instance_variable_set(:"@#{attr}", val)
10
12
  end
11
13
  end
12
14
 
13
- def self.load(article_id, image_prefix: '')
14
- binding = Binding.new(article_id)
15
-
16
- Parser.new(binding, image_prefix).article.tap do |article|
17
- article.id = article_id
18
- end
15
+ def self.find(article_id, image_prefix: '')
16
+ parser = Parser.new(article_id, image_prefix)
17
+ parser.fetch_and_parse!
18
+ Article.new(parser.parsed_data)
19
19
  end
20
20
  end
21
21
  end
@@ -0,0 +1,5 @@
1
+ module Telegraph
2
+ module Parser
3
+ class ArticleNotFound < StandardError; end
4
+ end
5
+ end
@@ -0,0 +1,23 @@
1
+ require 'open-uri'
2
+
3
+ module Telegraph
4
+ module Parser
5
+ class Fetcher
6
+ HOST = 'telegra.ph'.freeze
7
+
8
+ def fetch(article_id)
9
+ Net::HTTP.get(HOST, "/#{article_id}")
10
+ rescue EOFError
11
+ raise Telegraph::Parser::ArticleNotFound
12
+ end
13
+
14
+ def fetch_image(src, prefix)
15
+ { image_id(src, prefix) => open(src).read }
16
+ end
17
+
18
+ def image_id(src, prefix)
19
+ "#{prefix}/#{File.basename(URI.parse(src).path)}"
20
+ end
21
+ end
22
+ end
23
+ end
@@ -3,48 +3,56 @@ require 'nokogiri'
3
3
  module Telegraph
4
4
  module Parser
5
5
  class Parser
6
- attr_accessor :bindings, :page, :article, :image_prefix
6
+ attr_reader :parsed_data
7
7
 
8
- def initialize(binding, image_prefix)
9
- self.bindings = binding
10
- self.image_prefix = image_prefix
11
- parse_html
8
+ def initialize(article_id, image_prefix)
9
+ @fetcher = Fetcher.new
10
+ @article_id = article_id
11
+ @image_prefix = image_prefix
12
+ end
13
+
14
+ def fetch_and_parse!
15
+ load_page!
16
+ load_images!
17
+ compile_parsed_data!
12
18
  end
13
19
 
14
20
  private
15
21
 
16
- def page
17
- @page ||= Nokogiri::HTML(bindings.content).css('article')
22
+ def load_page!
23
+ @page = Nokogiri::HTML(@fetcher.fetch(@article_id)).css('article')
18
24
  end
19
25
 
20
- def images
21
- {}.tap do |images|
22
- page.search('.//img').each do |image|
23
- images.merge!(bindings.image(image.attributes['src'].value,
24
- image_prefix))
25
- end
26
- end
26
+ def compile_parsed_data!
27
+ @parsed_data = {
28
+ article_id: @article_id,
29
+ title: tag_content(:h1),
30
+ author: tag_content(:address),
31
+ content: without_tags,
32
+ images: @images
33
+ }
27
34
  end
28
35
 
29
- def parse_html
30
- images_hash = images
31
- self.article = Article.new(title: tag_content(:h1),
32
- author: tag_content(:address),
33
- content: without_tags,
34
- images: images_hash)
36
+ def load_images!
37
+ @images = {}
38
+
39
+ @page.search('.//img').each do |image|
40
+ i = @fetcher.fetch_image(image.attributes['src'].value, @image_prefix)
41
+ @images.merge!(i)
42
+ end
35
43
  end
36
44
 
37
45
  def tag_content(tag)
38
- page.search(".//#{tag}").text
46
+ @page.search(".//#{tag}").text
39
47
  end
40
48
 
41
49
  def without_tags
42
- page_copy = page.dup
43
- %i(h1 address).each { |tag| page.search(".//#{tag}").remove }
50
+ page_copy = @page.dup
51
+ %i(h1 address).each { |tag| @page.search(".//#{tag}").remove }
44
52
 
45
53
  page_copy.search('.//img').each do |image|
46
54
  image.attributes['src'].value =
47
- bindings.image_id(image.attributes['src'].value, image_prefix)
55
+ @fetcher.image_id(image.attributes['src'].value, @image_prefix)
48
56
  end
49
57
 
50
58
  page_copy.remove_attr('id').remove_class.to_s
@@ -1,5 +1,5 @@
1
1
  module Telegraph
2
2
  module Parser
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
@@ -1,5 +1,6 @@
1
1
  require "telegraph/parser/version"
2
- require "telegraph/parser/binding"
2
+ require "telegraph/parser/errors"
3
+ require "telegraph/parser/fetcher"
3
4
  require "telegraph/parser/parser"
4
5
  require "telegraph/parser/article"
5
6
 
@@ -22,4 +22,5 @@ Gem::Specification.new do |spec|
22
22
  spec.add_development_dependency "rake", "~> 10.0"
23
23
  spec.add_development_dependency "rspec", "~> 3.0"
24
24
  spec.add_development_dependency "nokogiri", "~> 1.6.8"
25
+ spec.add_development_dependency "pry"
25
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: telegraph-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sergey Vernidub
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-12-11 00:00:00.000000000 Z
11
+ date: 2018-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: 1.6.8
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  description:
70
84
  email:
71
85
  - svernidub@gmail.com
@@ -74,6 +88,8 @@ extensions: []
74
88
  extra_rdoc_files: []
75
89
  files:
76
90
  - ".gitignore"
91
+ - ".rspec"
92
+ - ".travis.yml"
77
93
  - Gemfile
78
94
  - LICENSE.txt
79
95
  - README.md
@@ -82,7 +98,8 @@ files:
82
98
  - bin/setup
83
99
  - lib/telegraph/parser.rb
84
100
  - lib/telegraph/parser/article.rb
85
- - lib/telegraph/parser/binding.rb
101
+ - lib/telegraph/parser/errors.rb
102
+ - lib/telegraph/parser/fetcher.rb
86
103
  - lib/telegraph/parser/parser.rb
87
104
  - lib/telegraph/parser/version.rb
88
105
  - telegraph-parser.gemspec
@@ -1,31 +0,0 @@
1
- require 'open-uri'
2
-
3
- module Telegraph
4
- module Parser
5
- class Binding
6
- BASE_URI = 'telegra.ph'
7
-
8
- attr_accessor :article_id
9
-
10
- def initialize(article_id)
11
- self.article_id = article_id
12
- end
13
-
14
- def content
15
- Net::HTTP.get(BASE_URI, "/#{article_id}")
16
- rescue EOFError
17
- ""
18
- end
19
-
20
- def image(src, prefix)
21
- src.gsub!('http://telegra.ph', '')
22
- { image_id(src, prefix) => open("http://#{BASE_URI}#{src}").read }
23
- end
24
-
25
- def image_id(src, prefix)
26
- src.gsub!('http://telegra.ph', '')
27
- "#{prefix}/#{File.basename(URI.parse("#{BASE_URI}#{src}").path)}"
28
- end
29
- end
30
- end
31
- end