fly_parser 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a4b60890098babea5cdf0a19b2ab7f72207bce93
4
- data.tar.gz: a5cf7fdba982c30f622f2c048ea5ef08d6a4565a
3
+ metadata.gz: 04dc5a5ee6aefcaad08a66b4d86d418d2e2d6621
4
+ data.tar.gz: 8145b7beb20e9b7047c2422e3172a14b25218267
5
5
  SHA512:
6
- metadata.gz: 4d9bfcfbc33a83acbf053cca55206f27377a1f6ab62ead285def08fc43ef659ea61aee533c5c743b21ee06b8d7c22e79b77942370ce76ace69270e14f359f5fd
7
- data.tar.gz: f4b199bf649c121cbf1883e00acf451742dd25830076bc3b55a62258e149d1c314026e447580a3c8e2eab3d42b6648e5b9adeb763056aa3bec10322c62d6c288
6
+ metadata.gz: 4283c77559952bdcc2ed404e9b629c0263b806d9d0a776fed4d198674b6e92fb5601f65942fb9b29297fc7e2a42599ceba6740f2fcedcbce2bda0ec1432f9e0b
7
+ data.tar.gz: 37b9f632c4a943defa36e6d5d6a666ed3d3735379d8e0221b10afb881e64d4df096b92f9fac75d215a7cfe7cf833b502713feba5c298212385fe6daf2a21eed4
@@ -0,0 +1,41 @@
1
+ # add new parser source here
2
+ module Enable
3
+ def fitness(source)
4
+ lambda do |item|
5
+ item["parser"] = Parser::Exercise.new(item["url"], source: source) and next if item["type"] == "exercises"
6
+ item["parser"] = Parser::Fitness.new(item["url"], source: source)
7
+ end
8
+ end
9
+
10
+ def news(source)
11
+ lambda { |item| item["parser"] = Parser::News.new(item["url"], source: source) }
12
+ end
13
+
14
+ def local(source)
15
+ lambda { |item| item["parser"] = Parser::News.new(item["file"], {type: :file, source: source}) }
16
+ end
17
+
18
+ def news_nl(source)
19
+ lambda { |item| item["parser"] = Parser::NewsNl.new(item["url"], source: source) }
20
+ end
21
+
22
+ def news_fr(source)
23
+ lambda {|item| item["parser"] = Parser::NewsFr.new(item["url"], source: source)}
24
+ end
25
+
26
+ def method_missing(meth, *args)
27
+ prefix = "enable_"
28
+ meth = meth.to_s
29
+ if meth.start_with?(prefix)
30
+ meth_name = meth.split(prefix).last
31
+ proc = send(meth_name, *args)
32
+ iterate_sources(*args, proc)
33
+ else
34
+ raise "Unknown method #{meth} in Enable class, ssory !"
35
+ end
36
+ end
37
+
38
+ def iterate_sources(source, block)
39
+ source["items"].each(&block)
40
+ end
41
+ end
@@ -0,0 +1,39 @@
1
+ module Parser
2
+ class NewsFr < XmlBase
3
+ def initialize(source, options = {})
4
+ super
5
+ end
6
+
7
+ def parse_all
8
+ items = @source.search('//item')
9
+ # # last_date = Time.now - 2.years # for dev 2 years
10
+ # # select! or reject! is not exists for Nokogiri#NodeSet
11
+ # # items = items.select {|item| item.xpath('pubDate').first.content() > last_date }
12
+ items.map do |item|
13
+ title = item.xpath('title/text()').text()
14
+ date = item.xpath('pubdate').first.content()
15
+
16
+ link = item.xpath('link/following-sibling::text()[1]').first
17
+ page = Nokogiri::HTML(open(link))
18
+
19
+ next if page.search('figure.img img').first.nil?
20
+
21
+ poster_image = page.search('.article-long figure.img img').first.attributes['src'].value
22
+ full_desc = page.search('.article-long .bd')
23
+ full_desc.search('.modification').remove()
24
+ full_desc.search('script').remove()
25
+ full_desc.search('.ft').remove()
26
+ full_desc.search('a').remove_attr('href')
27
+ full_desc.search('.twitter-tweet').remove()
28
+
29
+ desc = full_desc.inner_html
30
+ desc.gsub! /h2|h1|h3/, 'h4'
31
+ # remove href attributes
32
+ #full_desc = full_desc.text().gsub(/<a href="([a-zA-Z:\/\.\d\-]*)">(.*)<\/a>/,'<a>\2</a>')
33
+ copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
34
+ content = desc + copyright
35
+ {title: title, content: content, poster_image: poster_image}
36
+ end.compact
37
+ end
38
+ end
39
+ end
data/lib/fly_parser.rb CHANGED
@@ -34,6 +34,7 @@ end
34
34
 
35
35
  module Parser
36
36
  class << self
37
+ include Enable if defined? Enable
37
38
  # Get HTTP Source
38
39
  def http(url)
39
40
  Nokogiri::HTML(open(url))
@@ -41,6 +42,7 @@ module Parser
41
42
 
42
43
  def connect(url)
43
44
  agent = Mechanize.new
45
+ agent.pluggable_parser.default = Mechanize::Page
44
46
  agent.get(url)
45
47
  end
46
48
 
@@ -93,29 +95,10 @@ module Parser
93
95
  File.read(LOGO_PATH)
94
96
  end
95
97
 
96
- # choose parser for source here
97
98
  def init_parser(source)
98
- case source["source"]
99
- when "fitness"
100
- source["items"].each do |item|
101
- item["parser"] = Parser::Exercise.new(item["url"], source: source) and next if item["type"] == "exercises"
102
- item["parser"] = Parser::Fitness.new(item["url"], source: source)
103
- end
104
- when "news"
105
- source["items"].each do |item|
106
- item["parser"] = Parser::News.new(item["url"], source: source)
107
- end
108
- when "local"
109
- if source["enabled"]
110
- source["items"].each do |item|
111
- item["parser"] = Parser::News.new(item["file"], {type: :file, source: source})
112
- end
113
- end
114
- when "news-nl"
115
- source["items"].each do |item|
116
- item["parser"] = Parser::NewsNl.new(item["url"], source: source)
117
- end
118
- end
99
+ source_type = source["source"].gsub('-', '_')
100
+ prefix = "enable_"
101
+ send(prefix + source_type, source)
119
102
  end
120
103
 
121
104
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fly_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ruslan Korolev
@@ -118,11 +118,13 @@ files:
118
118
  - lib/fly_parser.rb
119
119
  - lib/fly_parser/base.rb
120
120
  - lib/fly_parser/config_example.yml
121
+ - lib/fly_parser/enable_source.rb
121
122
  - lib/fly_parser/logo.txt
122
123
  - lib/fly_parser/mechanize_fix.rb
123
124
  - lib/fly_parser/sources/astrology.rb
124
125
  - lib/fly_parser/sources/exercise.rb
125
126
  - lib/fly_parser/sources/fitness.rb
127
+ - lib/fly_parser/sources/news-fr.rb
126
128
  - lib/fly_parser/sources/news-nl.rb
127
129
  - lib/fly_parser/sources/news.rb
128
130
  - lib/fly_parser/sources/sport.rb