fly_parser 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/fly_parser/enable_source.rb +41 -0
 - data/lib/fly_parser/sources/news-fr.rb +39 -0
 - data/lib/fly_parser.rb +5 -22
 - metadata +3 -1
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 04dc5a5ee6aefcaad08a66b4d86d418d2e2d6621
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 8145b7beb20e9b7047c2422e3172a14b25218267
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 4283c77559952bdcc2ed404e9b629c0263b806d9d0a776fed4d198674b6e92fb5601f65942fb9b29297fc7e2a42599ceba6740f2fcedcbce2bda0ec1432f9e0b
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 37b9f632c4a943defa36e6d5d6a666ed3d3735379d8e0221b10afb881e64d4df096b92f9fac75d215a7cfe7cf833b502713feba5c298212385fe6daf2a21eed4
         
     | 
| 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # add new parser source here
         
     | 
| 
      
 2 
     | 
    
         
            +
            module Enable
         
     | 
| 
      
 3 
     | 
    
         
            +
              def fitness(source)
         
     | 
| 
      
 4 
     | 
    
         
            +
                lambda do |item|
         
     | 
| 
      
 5 
     | 
    
         
            +
                  item["parser"] = Parser::Exercise.new(item["url"], source: source) and next if item["type"] == "exercises"
         
     | 
| 
      
 6 
     | 
    
         
            +
                  item["parser"] = Parser::Fitness.new(item["url"], source: source)
         
     | 
| 
      
 7 
     | 
    
         
            +
                end
         
     | 
| 
      
 8 
     | 
    
         
            +
              end
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
              def news(source)
         
     | 
| 
      
 11 
     | 
    
         
            +
                lambda { |item| item["parser"] = Parser::News.new(item["url"], source: source) }
         
     | 
| 
      
 12 
     | 
    
         
            +
              end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              def local(source)
         
     | 
| 
      
 15 
     | 
    
         
            +
                lambda { |item| item["parser"] = Parser::News.new(item["file"], {type: :file, source: source}) }
         
     | 
| 
      
 16 
     | 
    
         
            +
              end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
              def news_nl(source)
         
     | 
| 
      
 19 
     | 
    
         
            +
                lambda { |item| item["parser"] = Parser::NewsNl.new(item["url"], source: source) }
         
     | 
| 
      
 20 
     | 
    
         
            +
              end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              def news_fr(source)
         
     | 
| 
      
 23 
     | 
    
         
            +
                lambda {|item| item["parser"] = Parser::NewsFr.new(item["url"], source: source)}
         
     | 
| 
      
 24 
     | 
    
         
            +
              end
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
              def method_missing(meth, *args)
         
     | 
| 
      
 27 
     | 
    
         
            +
                prefix = "enable_"
         
     | 
| 
      
 28 
     | 
    
         
            +
                meth = meth.to_s
         
     | 
| 
      
 29 
     | 
    
         
            +
                if meth.start_with?(prefix)
         
     | 
| 
      
 30 
     | 
    
         
            +
                  meth_name = meth.split(prefix).last
         
     | 
| 
      
 31 
     | 
    
         
            +
                  proc = send(meth_name, *args)
         
     | 
| 
      
 32 
     | 
    
         
            +
                  iterate_sources(*args, proc)
         
     | 
| 
      
 33 
     | 
    
         
            +
                else
         
     | 
| 
      
 34 
     | 
    
         
            +
                  raise "Unknown method #{meth} in Enable class, ssory !"
         
     | 
| 
      
 35 
     | 
    
         
            +
                end
         
     | 
| 
      
 36 
     | 
    
         
            +
              end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
              def iterate_sources(source, block)
         
     | 
| 
      
 39 
     | 
    
         
            +
                source["items"].each(&block)
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,39 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Parser
         
     | 
| 
      
 2 
     | 
    
         
            +
              class NewsFr < XmlBase
         
     | 
| 
      
 3 
     | 
    
         
            +
                def initialize(source, options = {})
         
     | 
| 
      
 4 
     | 
    
         
            +
                  super
         
     | 
| 
      
 5 
     | 
    
         
            +
                end
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                def parse_all
         
     | 
| 
      
 8 
     | 
    
         
            +
                  items = @source.search('//item')
         
     | 
| 
      
 9 
     | 
    
         
            +
                  # # last_date = Time.now - 2.years # for dev 2 years
         
     | 
| 
      
 10 
     | 
    
         
            +
                  # # select! or reject! is not exists for Nokogiri#NodeSet
         
     | 
| 
      
 11 
     | 
    
         
            +
                  # # items = items.select {|item| item.xpath('pubDate').first.content() > last_date }
         
     | 
| 
      
 12 
     | 
    
         
            +
                  items.map do |item|
         
     | 
| 
      
 13 
     | 
    
         
            +
                    title = item.xpath('title/text()').text()
         
     | 
| 
      
 14 
     | 
    
         
            +
                    date = item.xpath('pubdate').first.content()
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                    link = item.xpath('link/following-sibling::text()[1]').first
         
     | 
| 
      
 17 
     | 
    
         
            +
                    page = Nokogiri::HTML(open(link))
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                    next if page.search('figure.img img').first.nil?
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                    poster_image = page.search('.article-long figure.img img').first.attributes['src'].value
         
     | 
| 
      
 22 
     | 
    
         
            +
                    full_desc = page.search('.article-long .bd')
         
     | 
| 
      
 23 
     | 
    
         
            +
                    full_desc.search('.modification').remove()
         
     | 
| 
      
 24 
     | 
    
         
            +
                    full_desc.search('script').remove()
         
     | 
| 
      
 25 
     | 
    
         
            +
                    full_desc.search('.ft').remove()
         
     | 
| 
      
 26 
     | 
    
         
            +
                    full_desc.search('a').remove_attr('href')
         
     | 
| 
      
 27 
     | 
    
         
            +
                    full_desc.search('.twitter-tweet').remove()
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                    desc = full_desc.inner_html
         
     | 
| 
      
 30 
     | 
    
         
            +
                    desc.gsub! /h2|h1|h3/, 'h4'
         
     | 
| 
      
 31 
     | 
    
         
            +
                    # remove href attributes
         
     | 
| 
      
 32 
     | 
    
         
            +
                    #full_desc = full_desc.text().gsub(/<a href="([a-zA-Z:\/\.\d\-]*)">(.*)<\/a>/,'<a>\2</a>')
         
     | 
| 
      
 33 
     | 
    
         
            +
                    copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
         
     | 
| 
      
 34 
     | 
    
         
            +
                    content = desc + copyright
         
     | 
| 
      
 35 
     | 
    
         
            +
                    {title: title, content: content, poster_image: poster_image}
         
     | 
| 
      
 36 
     | 
    
         
            +
                  end.compact
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
              end
         
     | 
| 
      
 39 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/fly_parser.rb
    CHANGED
    
    | 
         @@ -34,6 +34,7 @@ end 
     | 
|
| 
       34 
34 
     | 
    
         | 
| 
       35 
35 
     | 
    
         
             
            module Parser
         
     | 
| 
       36 
36 
     | 
    
         
             
              class << self
         
     | 
| 
      
 37 
     | 
    
         
            +
                include Enable if defined? Enable
         
     | 
| 
       37 
38 
     | 
    
         
             
                # Get HTTP Source
         
     | 
| 
       38 
39 
     | 
    
         
             
                def http(url)
         
     | 
| 
       39 
40 
     | 
    
         
             
                  Nokogiri::HTML(open(url))
         
     | 
| 
         @@ -41,6 +42,7 @@ module Parser 
     | 
|
| 
       41 
42 
     | 
    
         | 
| 
       42 
43 
     | 
    
         
             
                def connect(url)
         
     | 
| 
       43 
44 
     | 
    
         
             
                  agent = Mechanize.new
         
     | 
| 
      
 45 
     | 
    
         
            +
                  agent.pluggable_parser.default = Mechanize::Page
         
     | 
| 
       44 
46 
     | 
    
         
             
                  agent.get(url)
         
     | 
| 
       45 
47 
     | 
    
         
             
                end
         
     | 
| 
       46 
48 
     | 
    
         | 
| 
         @@ -93,29 +95,10 @@ module Parser 
     | 
|
| 
       93 
95 
     | 
    
         
             
                  File.read(LOGO_PATH)
         
     | 
| 
       94 
96 
     | 
    
         
             
                end
         
     | 
| 
       95 
97 
     | 
    
         | 
| 
       96 
     | 
    
         
            -
                # choose parser for source here
         
     | 
| 
       97 
98 
     | 
    
         
             
                def init_parser(source)
         
     | 
| 
       98 
     | 
    
         
            -
                   
     | 
| 
       99 
     | 
    
         
            -
                   
     | 
| 
       100 
     | 
    
         
            -
             
     | 
| 
       101 
     | 
    
         
            -
                      item["parser"] = Parser::Exercise.new(item["url"], source: source) and next if item["type"] == "exercises"
         
     | 
| 
       102 
     | 
    
         
            -
                      item["parser"] = Parser::Fitness.new(item["url"], source: source)
         
     | 
| 
       103 
     | 
    
         
            -
                    end
         
     | 
| 
       104 
     | 
    
         
            -
                  when "news"
         
     | 
| 
       105 
     | 
    
         
            -
                    source["items"].each do |item|
         
     | 
| 
       106 
     | 
    
         
            -
                      item["parser"] = Parser::News.new(item["url"], source: source)
         
     | 
| 
       107 
     | 
    
         
            -
                    end
         
     | 
| 
       108 
     | 
    
         
            -
                  when "local"
         
     | 
| 
       109 
     | 
    
         
            -
                    if source["enabled"]
         
     | 
| 
       110 
     | 
    
         
            -
                      source["items"].each do |item|
         
     | 
| 
       111 
     | 
    
         
            -
                        item["parser"] = Parser::News.new(item["file"], {type: :file, source: source})
         
     | 
| 
       112 
     | 
    
         
            -
                      end
         
     | 
| 
       113 
     | 
    
         
            -
                    end
         
     | 
| 
       114 
     | 
    
         
            -
                  when "news-nl"
         
     | 
| 
       115 
     | 
    
         
            -
                    source["items"].each do |item|
         
     | 
| 
       116 
     | 
    
         
            -
                      item["parser"] = Parser::NewsNl.new(item["url"], source: source)
         
     | 
| 
       117 
     | 
    
         
            -
                    end
         
     | 
| 
       118 
     | 
    
         
            -
                  end
         
     | 
| 
      
 99 
     | 
    
         
            +
                  source_type = source["source"].gsub('-', '_')
         
     | 
| 
      
 100 
     | 
    
         
            +
                  prefix = "enable_"
         
     | 
| 
      
 101 
     | 
    
         
            +
                  send(prefix + source_type, source)
         
     | 
| 
       119 
102 
     | 
    
         
             
                end
         
     | 
| 
       120 
103 
     | 
    
         | 
| 
       121 
104 
     | 
    
         
             
              end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: fly_parser
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.7
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Ruslan Korolev
         
     | 
| 
         @@ -118,11 +118,13 @@ files: 
     | 
|
| 
       118 
118 
     | 
    
         
             
            - lib/fly_parser.rb
         
     | 
| 
       119 
119 
     | 
    
         
             
            - lib/fly_parser/base.rb
         
     | 
| 
       120 
120 
     | 
    
         
             
            - lib/fly_parser/config_example.yml
         
     | 
| 
      
 121 
     | 
    
         
            +
            - lib/fly_parser/enable_source.rb
         
     | 
| 
       121 
122 
     | 
    
         
             
            - lib/fly_parser/logo.txt
         
     | 
| 
       122 
123 
     | 
    
         
             
            - lib/fly_parser/mechanize_fix.rb
         
     | 
| 
       123 
124 
     | 
    
         
             
            - lib/fly_parser/sources/astrology.rb
         
     | 
| 
       124 
125 
     | 
    
         
             
            - lib/fly_parser/sources/exercise.rb
         
     | 
| 
       125 
126 
     | 
    
         
             
            - lib/fly_parser/sources/fitness.rb
         
     | 
| 
      
 127 
     | 
    
         
            +
            - lib/fly_parser/sources/news-fr.rb
         
     | 
| 
       126 
128 
     | 
    
         
             
            - lib/fly_parser/sources/news-nl.rb
         
     | 
| 
       127 
129 
     | 
    
         
             
            - lib/fly_parser/sources/news.rb
         
     | 
| 
       128 
130 
     | 
    
         
             
            - lib/fly_parser/sources/sport.rb
         
     |