fly_parser 0.0.17 → 0.0.18
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/fly_parser/enable_source.rb +4 -0
- data/lib/fly_parser/sources/news-az.rb +40 -0
- data/lib/fly_parser/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a65beb075aac81c1cafe3e85048b0c82e1d24d0e
|
4
|
+
data.tar.gz: 3be9c2ceac25bef81188c33ea8c56dece79e53e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c1a9722289e3024aa2528bbd7002beeaa7c37043a72c66f4d7acfae293dc6ae018354e74769317249651e73591d9f92db7fb28926de5524b3339b32c043b35b
|
7
|
+
data.tar.gz: 706443e426bd9bd381c2a1d08c938eb735d9e5e9762548ec9b6ec116959cb71a30e7e01437dd176008cc9582910fb1ef686d22481c8f006dcce4fa2dc2e4ce43
|
@@ -23,6 +23,10 @@ module Enable
|
|
23
23
|
lambda {|item| item["parser"] = Parser::NewsFr.new(item["url"], source: source)}
|
24
24
|
end
|
25
25
|
|
26
|
+
def news_az(source)
|
27
|
+
lambda {|item| item["parser"] = Parser::NewsAZ.new(item["url"], source: source)}
|
28
|
+
end
|
29
|
+
|
26
30
|
def method_missing(meth, *args)
|
27
31
|
prefix = "enable_"
|
28
32
|
meth = meth.to_s
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Parser
|
2
|
+
class NewsAZ < XmlBase
|
3
|
+
def initialize(source, options = {})
|
4
|
+
super
|
5
|
+
end
|
6
|
+
|
7
|
+
def parse_all
|
8
|
+
items = @source.search('//item')
|
9
|
+
# last_date = Time.now - 2.years # for dev 2 years
|
10
|
+
# select! or reject! is not exists for Nokogiri#NodeSet
|
11
|
+
# items = items.select {|item| item.xpath('pubDate').first.content() > last_date }
|
12
|
+
items.map do |item|
|
13
|
+
title = item.xpath('title/text()').text()
|
14
|
+
date = item.xpath('//pubdate').first.content
|
15
|
+
link = item.xpath('link/following-sibling::text()[1]').first
|
16
|
+
begin
|
17
|
+
page = Nokogiri::HTML(open(link))
|
18
|
+
rescue Exception => e
|
19
|
+
puts e.message
|
20
|
+
next
|
21
|
+
end
|
22
|
+
gallery_image = page.search('.gallery-photo img').first
|
23
|
+
single_image = page.search('.content-block .visual img').first
|
24
|
+
poster_image = single_image || gallery_image
|
25
|
+
next if poster_image.nil?
|
26
|
+
poster_image = poster_image.attributes['src'].value
|
27
|
+
full_desc = page.search('.content-block .text-block .text')
|
28
|
+
full_desc.search('a').remove()
|
29
|
+
full_desc.search("//text()[contains(., 'Автор')]").remove()
|
30
|
+
full_desc.search("//text()[contains(., 'Связаться')]").remove()
|
31
|
+
copyright = "<p>Источник: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
|
32
|
+
full_desc = full_desc.inner_html
|
33
|
+
full_desc.gsub!(/<!--noindex-->.*/,"")
|
34
|
+
full_desc.gsub!(/<!--\/noindex-->.*/,"")
|
35
|
+
content = full_desc + copyright
|
36
|
+
{title: title, content: content, poster_image: poster_image}
|
37
|
+
end.compact
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/fly_parser/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fly_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ruslan Korolev
|
@@ -138,6 +138,7 @@ files:
|
|
138
138
|
- lib/fly_parser/sources/astrology.rb
|
139
139
|
- lib/fly_parser/sources/exercise.rb
|
140
140
|
- lib/fly_parser/sources/fitness.rb
|
141
|
+
- lib/fly_parser/sources/news-az.rb
|
141
142
|
- lib/fly_parser/sources/news-fr.rb
|
142
143
|
- lib/fly_parser/sources/news-nl.rb
|
143
144
|
- lib/fly_parser/sources/news.rb
|
@@ -164,7 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
165
|
version: '0'
|
165
166
|
requirements: []
|
166
167
|
rubyforge_project:
|
167
|
-
rubygems_version: 2.4.
|
168
|
+
rubygems_version: 2.4.6
|
168
169
|
signing_key:
|
169
170
|
specification_version: 4
|
170
171
|
summary: Fly parser
|