RubyGems - fly_parser - Versions diffs - 0.0.5 → 0.0.6 - Mend

fly_parser 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/lib/fly_parser/sources/news-nl.rb +30 -0
data/lib/fly_parser/sources/news.rb +2 -23
data/lib/fly_parser/xml_base.rb +28 -0
data/lib/fly_parser.rb +8 -7
metadata +3 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 220a36cf7e2845300835a2f2aa9c931e01245402
-  data.tar.gz: 7bbcbb1959f8d78d690223265ce95b0418f5116e
+  metadata.gz: a4b60890098babea5cdf0a19b2ab7f72207bce93
+  data.tar.gz: a5cf7fdba982c30f622f2c048ea5ef08d6a4565a
 SHA512:
-  metadata.gz: 3e33cb8372a87e60b830c91343bbcd05705300644c073f5094adf8017686f8d59a874e2a10d8672795c07823c5eeeb1695c184c52ac8fe2e99081eb21a8f74d1
-  data.tar.gz: f398560cf2541d717de50ba5158996a0bb5bbb53b19fc443ee2d0283b41417cb873200acb4fd83bbf2e71847f6535bb12eb225dee86129fb18d2a01ad3a8107f
+  metadata.gz: 4d9bfcfbc33a83acbf053cca55206f27377a1f6ab62ead285def08fc43ef659ea61aee533c5c743b21ee06b8d7c22e79b77942370ce76ace69270e14f359f5fd
+  data.tar.gz: f4b199bf649c121cbf1883e00acf451742dd25830076bc3b55a62258e149d1c314026e447580a3c8e2eab3d42b6648e5b9adeb763056aa3bec10322c62d6c288

data/lib/fly_parser/sources/news-nl.rb ADDED Viewed

@@ -0,0 +1,30 @@
+module Parser
+  class NewsNl < XmlBase
+    def initialize(source, options = {})
+      super
+    end
+    def parse_all
+      items = @source.search('//item')
+      # last_date = Time.now - 2.years # for dev 2 years
+      # select! or reject! is not exists for Nokogiri#NodeSet
+      # items = items.select {|item| item.xpath('pubDate').first.content() > last_date }
+      items.map do |item|
+        title = item.xpath('title/text()').text()
+        date = item.xpath('pubDate').first.content()
+        link = item.xpath('link/text()').text()
+        page = Nokogiri::HTML(open(link))
+        next if page.search('#article-image a img').first.nil?
+        poster_image = page.search('#article-image a img').first.attributes['src'].value
+        full_desc = item.xpath('description/text()')
+        # remove href attributes
+        full_desc = full_desc.text().gsub(/<a href="([a-zA-Z:\/\.\d\-]*)">(.*)<\/a>/,'<a>\2</a>')
+        copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
+        content = full_desc + copyright
+        {title: title, content: content, poster_image: poster_image}
+      end.compact
+    end
+  end
+end

data/lib/fly_parser/sources/news.rb CHANGED Viewed

@@ -1,28 +1,7 @@
 module Parser
-  class News
+  class News < XmlBase
     def initialize(source, options = {})
-      if options[:type] == :file
-        source = fake_url(source)
-      end
-      @copyright = copyright(options)
-      @source = Parser.connect(source)
-      @delay ||= 10
-    end
-    def fake_url(source)
-      stream = File.read(source)
-      # test_file.com is a random url, just for Mechanize parsing
-      url = "http://www.google.com"
-      FakeWeb.register_uri(:get, url, :body => stream, :content_type => "application/xml")
-      url
-    end
-    def copyright(options)
-      source = options[:source]
-      {
-        url: source['copyright'],
-        title: source['copyright_title']
-      }
+      super
     end
     def parse_all

data/lib/fly_parser/xml_base.rb ADDED Viewed

@@ -0,0 +1,28 @@
+module Parser
+  class XmlBase
+    def initialize(source, options = {})
+      if options[:type] == :file
+        source = fake_url(source)
+      end
+      @copyright = copyright(options)
+      @source = Parser.connect(source)
+      @delay ||= 10
+    end
+    def fake_url(source)
+      stream = File.read(source)
+      # test_file.com is a random url, just for Mechanize parsing
+      url = "http://www.google.com"
+      FakeWeb.register_uri(:get, url, :body => stream, :content_type => "application/xml")
+      url
+    end
+    def copyright(options)
+      source = options[:source]
+      {
+        url: source['copyright'],
+        title: source['copyright_title']
+      }
+    end
+  end
+end

data/lib/fly_parser.rb CHANGED Viewed

@@ -5,9 +5,8 @@ require 'pry'
 require 'open-uri'
 require 'yaml'
 require 'mechanize'
-BASE_PATH = File.expand_path("fly_parser/base", File.dirname(__FILE__))
+BASE_PATH = File.expand_path("fly_parser/*.rb", File.dirname(__FILE__))
 LOGO_PATH = File.expand_path("fly_parser/logo.txt", File.dirname(__FILE__))
-MECHANIZE_FIX = File.expand_path("fly_parser/mechanize_fix", File.dirname(__FILE__))
 Pry.config.print = proc { |output, value| output.puts value.ai }
@@ -24,17 +23,15 @@ def require_all(path)
 end
 unless defined? Rails
-  require BASE_PATH
+  Dir[BASE_PATH].each do |base_file|
+    require base_file
+  end
   Dir.chdir RAILS_ROOT
   require RAILS_BOOT_PATH
   require RAILS_CONFIG_PATH
   require_all 'fly_parser/sources'
 end
-# fix mechanize by monkey-patching :)
-require MECHANIZE_FIX
 module Parser
   class << self
     # Get HTTP Source
@@ -114,6 +111,10 @@ module Parser
             item["parser"] = Parser::News.new(item["file"], {type: :file, source: source})
           end
         end
+      when "news-nl"
+        source["items"].each do |item|
+          item["parser"] = Parser::NewsNl.new(item["url"], source: source)
+        end
       end
     end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: fly_parser
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 0.0.6
 platform: ruby
 authors:
 - Ruslan Korolev
@@ -123,8 +123,10 @@ files:
 - lib/fly_parser/sources/astrology.rb
 - lib/fly_parser/sources/exercise.rb
 - lib/fly_parser/sources/fitness.rb
+- lib/fly_parser/sources/news-nl.rb
 - lib/fly_parser/sources/news.rb
 - lib/fly_parser/sources/sport.rb
+- lib/fly_parser/xml_base.rb
 homepage: http://rubygems.org
 licenses:
 - MIT