fly_parser 0.0.17 → 0.0.18

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1def1d078ca769525262a4f539e27fcc321ee7ac
4
- data.tar.gz: 9a0c3d40d42ac7e6473d09d1bc2c4d5151703c07
3
+ metadata.gz: a65beb075aac81c1cafe3e85048b0c82e1d24d0e
4
+ data.tar.gz: 3be9c2ceac25bef81188c33ea8c56dece79e53e7
5
5
  SHA512:
6
- metadata.gz: b7bd40da36d925d8babd5fee1598cfcaa37a1b68d24dad63c59fd1292d752a3c6574707bf49806589890ed141fede34168fb8c7abe64d1df2d51fd8affcf61be
7
- data.tar.gz: 4a3b7476ae9ccae79a75a0ac29495536f4269e7c8ad9b5e1335eba1b6011709fa707558ddf875bc62fd659d964dd37133a4a921a5187f8a06cd1f855a5555643
6
+ metadata.gz: 8c1a9722289e3024aa2528bbd7002beeaa7c37043a72c66f4d7acfae293dc6ae018354e74769317249651e73591d9f92db7fb28926de5524b3339b32c043b35b
7
+ data.tar.gz: 706443e426bd9bd381c2a1d08c938eb735d9e5e9762548ec9b6ec116959cb71a30e7e01437dd176008cc9582910fb1ef686d22481c8f006dcce4fa2dc2e4ce43
@@ -23,6 +23,10 @@ module Enable
23
23
  lambda {|item| item["parser"] = Parser::NewsFr.new(item["url"], source: source)}
24
24
  end
25
25
 
26
+ def news_az(source)
27
+ lambda {|item| item["parser"] = Parser::NewsAZ.new(item["url"], source: source)}
28
+ end
29
+
26
30
  def method_missing(meth, *args)
27
31
  prefix = "enable_"
28
32
  meth = meth.to_s
@@ -0,0 +1,40 @@
1
+ module Parser
2
+ class NewsAZ < XmlBase
3
+ def initialize(source, options = {})
4
+ super
5
+ end
6
+
7
+ def parse_all
8
+ items = @source.search('//item')
9
+ # last_date = Time.now - 2.years # for dev 2 years
10
+ # select! or reject! is not exists for Nokogiri#NodeSet
11
+ # items = items.select {|item| item.xpath('pubDate').first.content() > last_date }
12
+ items.map do |item|
13
+ title = item.xpath('title/text()').text()
14
+ date = item.xpath('//pubdate').first.content
15
+ link = item.xpath('link/following-sibling::text()[1]').first
16
+ begin
17
+ page = Nokogiri::HTML(open(link))
18
+ rescue Exception => e
19
+ puts e.message
20
+ next
21
+ end
22
+ gallery_image = page.search('.gallery-photo img').first
23
+ single_image = page.search('.content-block .visual img').first
24
+ poster_image = single_image || gallery_image
25
+ next if poster_image.nil?
26
+ poster_image = poster_image.attributes['src'].value
27
+ full_desc = page.search('.content-block .text-block .text')
28
+ full_desc.search('a').remove()
29
+ full_desc.search("//text()[contains(., 'Автор')]").remove()
30
+ full_desc.search("//text()[contains(., 'Связаться')]").remove()
31
+ copyright = "<p>Источник: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
32
+ full_desc = full_desc.inner_html
33
+ full_desc.gsub!(/<!--noindex-->.*/,"")
34
+ full_desc.gsub!(/<!--\/noindex-->.*/,"")
35
+ content = full_desc + copyright
36
+ {title: title, content: content, poster_image: poster_image}
37
+ end.compact
38
+ end
39
+ end
40
+ end
@@ -1,3 +1,3 @@
1
1
  module Parser
2
- VERSION = "0.0.17"
2
+ VERSION = "0.0.18"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fly_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
4
+ version: 0.0.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ruslan Korolev
@@ -138,6 +138,7 @@ files:
138
138
  - lib/fly_parser/sources/astrology.rb
139
139
  - lib/fly_parser/sources/exercise.rb
140
140
  - lib/fly_parser/sources/fitness.rb
141
+ - lib/fly_parser/sources/news-az.rb
141
142
  - lib/fly_parser/sources/news-fr.rb
142
143
  - lib/fly_parser/sources/news-nl.rb
143
144
  - lib/fly_parser/sources/news.rb
@@ -164,7 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
164
165
  version: '0'
165
166
  requirements: []
166
167
  rubyforge_project:
167
- rubygems_version: 2.4.4
168
+ rubygems_version: 2.4.6
168
169
  signing_key:
169
170
  specification_version: 4
170
171
  summary: Fly parser